Ejemplo n.º 1
0
def lemmatize(input,multiList=False,cascade=True):

	stemmed = []
	if cascade == True:
		if multiList == False :
			filtered = stopwordremover.remove_stop_word(input)
		else:
			filtered = stopwordremover.remove_stop_word(input,True)

		for word in filtered:
			stemmed.append(lemmatizationEngine(word))

		stemmed = list(set(stemmed))
		return stemmed

	else :

		if multiList == False:
			lst = normalizer.normalize(input)
		else :
			lst = normalizer.normalize(input,True)

		for word in lst:
			stemmed.append(lemmatizationEngine(word))

		#stemmed = list(set(stemmed))
		return stemmed
Ejemplo n.º 2
0
 def test_asciichars(self):
     """Fix ASCII characters"""
     self.assertEqual(norm.normalize("What’s up"), "what is up")
     self.assertEqual(norm.normalize("What's up"), "what is up")
     self.assertEqual(norm.normalize("I said “shut up”"),
                      'I said "shut up"')
     self.assertEqual(norm.normalize("œ"), '')
Ejemplo n.º 3
0
 def test_replacesubstitutes(self):
     """should replace subsitutes"""
     self.assertEqual(norm.normalize("Nov 1st I weighed 90 kgs. total"),
                      "November 1st I weighed 90 kilograms total")
     self.assertEqual(
         norm.normalize("I shared it on FB w/ friends, ie: you"),
         "I shared it on Facebook with friends, for example : you")
Ejemplo n.º 4
0
def remove_stop_word(input, multiList=False):
    if isinstance(input, str):
        filtered = [
            word for word in normalizer.normalize(input)
            if (word not in stopwords['english'] and not word.isdigit())
        ]
        return filtered

    if not isinstance(input, basestring):
        if multiList == True:
            for index, lst in input:
                input[index] = [
                    word for word in normalizer.normalize(lst)
                    (word not in stopwords['english'] and not word.isdigit())
                ]
            return input

        else:
            input = [
                word for word in normalizer.normalize(input)(
                    word not in stopwords['english'] and not word.isdigit())
            ]
            return input


#print tokenizer.tokenize('sdfdsf sdffsd sdfsdfds')
#print remove_stop_word('hello i Am mayank. I Am a Good boy')
Ejemplo n.º 5
0
 def test_contractions(self):
     """should expand contractions"""
     self.assertEqual(norm.normalize("I'm on the yelow zebra"),
                      "I am on the yellow zebra")
     self.assertEqual(norm.normalize("I'll listen to y'all"),
                      "I will listen to you all")
     self.assertEqual(norm.normalize("do n't make it right"),
                      "do not make it right")
     self.assertEqual(norm.normalize("it's all good"), "it is all good")
    def echo(self, data, start, end):
        if (self.audio_file is not None):
            recording = self.asource.read()
        else:
            recording = b''.join(data)
            print("Acoustic Activity at: {0}--{1}".format(start, end))

        print(recording)
        #data = np.array(data)
        #serialized = np.frombuffer(data)

        #print(len(hex_data))
        #print(len(recording))
        normalize(recording)
        pad_tokens('tmp.wav2')
        sample_rate, normalized_signal = wavfile.read('tmp.wav2')
        print(sample_rate)
        print(len(normalized_signal))
        banks = convert_to_mel(normalized_signal)
        banks = np.array(banks)
        Banks = banks.reshape(1, 98, 40, 1)
        #np.save('test.npy', Banks)

        z = self.model.predict(Banks)
        p = z[0].tolist().index(max(z[0]))
        self.recognized_keyword = self.categories[p]
        print(self.recognized_keyword)
        '''
		frame_length, step_size = 16000, 64000
		no_of_shifts = int(64000 / step_size) - int(frame_length / step_size)
		print(no_of_shifts)
		#keyword = None
		prob = 0
		for i in range(0, no_of_shifts):
			l = int(i*320)
			banks = convert_to_mel(normalized_signal[l:l+frame_length])
			banks = np.array(banks)
			np.save('test.npy', banks)
			#banks = np.load('sd.npy')
			Banks = banks.reshape(1, 98, 40, 1)
			z = self.model.predict(Banks)
			print(z)
			p = z[0].tolist().index(max(z[0]))
			prob += p
			self.recognized_keyword = self.categories[p]
			#self.recognized_keyword = self.categories[np.argmax(z[0], -1)]
			#com = self.commands[p]
			print(self.recognized_keyword)
		final = int(np.ceil(prob/no_of_shifts))
		print(prob/no_of_shifts)
		print(final)
		self.recognized_keyword = self.categories[final]'''

        K.clear_session()
        os.remove('tmp.wav2')
        os.remove('tmp.wav')
Ejemplo n.º 7
0
def load_problem(dom_name):
    print "Parsing..."
    p = parser.Problem(dom_name)
    p.max_faults = -1

    print "Normalizing..."
    for a in p.actions:
        normalize(a)

    print "Ready!"

    return p
Ejemplo n.º 8
0
def merge_clauses(sentences):

    """
    этот метод получает json в виде списка "язык-предлоэение"
    {
        'ru':russian_sentence
        'en':english_sentence
    }

    возращает список клауз + код ответа
    {
        'clauses':
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        response:
            {code:0,
            description:''}

    }

    """
    input = json.loads(sentences)

    sent_rus = input['ru']
    sent_en = input['en']

    sent1 = json.loads(split_to_clauses(json.dumps({'ru':sent_rus})))
    sent2 = json.loads(split_to_clauses(json.dumps({'en':sent_en})))

    checking = check_stream(sent1, sent2)


    if checking['code'] == 1:
        zipped_clauses =  zip(sent1['clauses'], sent2['clauses'])
    else:
        zipped_clauses = None

    #пока что zipped_clauses[i][0] - русские клаузы,  zipped_clauses[i][2] - английские
    #со временем необходимо переделать метод под произвольные языки

    #инициализируем переменную для вывода
    output = {'clauses':[],'response':checking}

    if(zipped_clauses):
        for pairs in zipped_clauses:
            output['clauses'].append({'ru':normalizer.normalize(pairs[0]),'en':normalizer.normalize(pairs[1])})
    else:
        output['clauses'] = None

    print json.dumps(output)
    return json.dumps(output)
Ejemplo n.º 9
0
def evaluate(model, dev_data, loss_fn, save=False):
    print("Running evaluation...")

    model.eval()
    length = len(dev_data)

    # loss metrics
    l2_loss_fn = loss_fns.L2Loss()
    all_loss = []
    l2_losses = []
    for t, (x, y) in enumerate(dev_data):
        x_copy = np.copy(x.numpy())
        x_var = Variable(normalize(x).permute(0, 3, 1, 2)).type(dtype)
        y_var = Variable(normalize(y).permute(0, 3, 1, 2)).type(dtype)

        scores, _, C, M1, M2, res_img1, res_img2 = model(x_var)
        if (t >= length - 2 and save):
            extra = results_folder + "extra/"
            os.makedirs(extra, exist_ok=True)
            for i in range(NUM_SAVED_SAMPLES):
                name = results_folder + "{}_{}_".format(t, i)
                convert_and_save(name + "gen.png", scores[i])
                convert_and_save(name + "gold.png", y_var[i])
                try:
                    convert_and_save(extra + "resgen1.png", res_img1[i])
                    convert_and_save(extra + "resgen2.png", res_img2[i])
                except Exception:
                    print(traceback.format_exc())

                # np.save(name + 'C', C.data.cpu().numpy())
                try:
                    np.save(extra + 'M1', M1.data.cpu().numpy())
                    np.save(extra + 'M2', M2.data.cpu().numpy())
                except Exception:
                    print(traceback.format_exc())
                # convert_and_save(name + "__Cx.png", )
                x_res = x_copy[i]
                try:
                    imsave(extra + "orig_0.png", x_res[:, :, :3])
                    imsave(extra + "orig_1.png", x_res[:, :, 3:])
                except Exception:
                    print(traceback.format_exc())

        all_loss.append(calculate_norm_loss(x_var, y_var, scores, loss_fn))
        l2_losses.append(calculate_norm_loss(x_var, y_var, scores, l2_loss_fn))

    total_loss = sum(all_loss) / len(all_loss)
    total_l2_loss = sum(l2_losses) / len(l2_losses)
    print("Eval norm l2 loss: %.4f, norm total loss: %.4f" % (total_l2_loss, total_loss))
    return total_loss
Ejemplo n.º 10
0
    def _load(self):
        """
        Validates and normalizes Batch data
        Updates member `loaded_status` with `OK`, `BATCH_NO_DATA`, `BATCH_NOT_VALID` or `BATCH_NOT_NORMALIZED`
        :return: None
        """
        if self._data is None:
            logging.info('No data was found')
            self.load_status = BATCH_NO_DATA
            return

        status, message = validate(self._data)

        if status != OK:
            logging.info('Validation failed : ' + message)
            self.load_status = BATCH_NOT_VALID
            return

        self.name = self._data['name']
        self.icon_path = parse(self._data['icon_path'])
        tags, tasks, status = normalize(self._data)

        if status != OK:
            logging.info('Batch normalization failed')
            self.load_status = BATCH_NOT_NORMALIZED
            return

        self.tags = tags
        self.tasks = tasks
        self.load_status = OK
Ejemplo n.º 11
0
	def train(self,X,y): 
		theta0 = self.roll(self.theta)
		X,self.mean,self.std = normalize(X)
		self.nTrainingExamples = X.shape[0]
		results = minimizer(lambda x: self.cost_function(X,y,x),theta0,approx_grad = False)
		self.theta = self.unroll(self.theta,results[0])
		return results
Ejemplo n.º 12
0
 def react(self, status):
     from_user = status.author.screen_name
     text = N.normalize(status.text)
     reply = self.lang.gen(text)
     print(from_user, text, reply)
     reply = "@{} {}".format(from_user, reply)[0:140]
     self.api.update_status(reply, status.id_str)
def process():

    # Mengakses data form dari request HTTP
    text = request.form.get("text", "")

    # Melakukan preprocessing
    text = preprocess(text)

    # Melakukan tagging
    text = tag(text, "http://localhost:7000")

    # Melakukan chunking
    text = chunk(text)

    # Melakukan proses normalisasi
    text = normalize(text)

    # Membuat response HTTP dengan format JSON yang berisi teks yang telah diproses
    return jsonify({
        "status": "success",
        "message": "Request successful",
        "data": {
            "text": text
        }
    })
Ejemplo n.º 14
0
    def test_normalizer(self):
        norm_file = csv_functions.csv_open('test_norm.csv')
        expected = [[100.0, 0], [90.0, 5], [80.0, 7.5], [70.0, 25], [60.0, 40],
                    [50.0, 50], [40.0, 40], [30.0, 22.5], [20.0, 17.5],
                    [10.0, 7.5], [0.0, 2.5]]
        actual = csv_functions.csv_open('test_1.csv')
        actual = pixel_to_embryo_length.pixel_to_embryo_length(actual)
        normalizer.normalize(actual, norm_file)
        self.assertEqual(expected, actual)

        expected = [[100.0, 0], [90.0, 4], [80.0, 6], [70.0, 20], [60.0, 32],
                    [50.0, 40], [40.0, 32], [30.0, 18], [20.0, 14], [10.0, 6],
                    [0.0, 2]]
        actual = csv_functions.csv_open('test_2.csv')
        actual = pixel_to_embryo_length.pixel_to_embryo_length(actual)
        normalizer.normalize(actual, norm_file)
        self.assertEqual(expected, actual)
Ejemplo n.º 15
0
    def parse(self):
        t = tokenizer.Tokenizer()
        for word in t.get_tokens(normalize(self.file_name)):
            self.process(word)
        if self.save:
            self.dictionary.save()

        return 0
Ejemplo n.º 16
0
def is_subset(a, b):
    '''
    params a, b are expressions in string
    return True if a ⊆ b or False if a ⊈ b
    '''
    import parser
    import normalizer

    if type(a) is str:
        a = parser.parse(a)
    a = normalizer.normalize(a)

    if type(b) is str:
        b = parser.parse(b)
    b = normalizer.normalize(b)

    return _is_subset(a, b)
Ejemplo n.º 17
0
def train(model, loss_fn, optimizer, train_data, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d...' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(train_data):
            x_var = Variable(normalize(x).permute(0,3,1,2)).type(dtype)
            y_var = Variable(normalize(y).permute(0,3,1,2)).type(dtype)

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % PRINT_EVERY == 0:
                print('\tt = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
Ejemplo n.º 18
0
def remove_stop_word(input,multiList=False):
	if isinstance(input,str):
		filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not word.isdigit())]
		return filtered

	if not isinstance(input, basestring):
		if multiList == True:
			for index,lst in input:
				input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not word.isdigit())]
			return input

		else :
			input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not word.isdigit())]
			return input


#print tokenizer.tokenize('sdfdsf sdffsd sdfsdfds')
#print remove_stop_word('hello i Am mayank. I Am a Good boy')
Ejemplo n.º 19
0
def read_url(url):

    checked_links.append(url)

    url = n.normalize(url, main_url_domain, main_url_ext)

    # check normalizer.py mailto: condition
    print("Fetching page at {}...".format(url), end='')
    if url is not None:
        try:
            url_request = requests.get(url)
        except Exception:
            print("Could not read url...")
            return None
        print("...done")

        if url != main_url:
            print("Checking: ", url)
            url_domain = s.extract(url)["url_domain"]
        else:
            url_domain = main_url_domain

        is_ok = True

        if url_request.status_code >= 400:

            broken_links.append(url)
            is_ok = False

            write_broken = url + "," + str(url_request.status_code) + "\n"
            broken_file.write(write_broken)
            print("* Broken url: ", url)
            print("")
            return None

        soup = BeautifulSoup(url_request.content,
                             "html.parser",
                             from_encoding="iso-8859-1")

        print("Looking for links on the webpage...", end='')
        url_list = soup.find_all('a', href=True)
        print("...done")
        print("")

        write_checked = url + "," \
            + str(url_request.status_code) + "," + str(is_ok) + "\n"

        checked_file.write(write_checked)

        if url_domain == main_url_domain:
            for link in url_list:
                if not link['href']:
                    continue

                if link['href'] not in checked_links:
                    read_url(link['href'])
Ejemplo n.º 20
0
def pos_tags_count(text):
    pos_counts = {}
    parsed_tokens = normalize(text)
    pos_tags = get_only_pos(parsed_tokens)
    unique_tags = ['VERB','ADJ','NOUN','ADV','NUM','SCONJ','CCONJ','CONJ']
    for tag in unique_tags:
        pos_counts[tag] = pos_tags.count(tag)/len(pos_tags)
    pos_counts['CONJ'] = pos_counts['SCONJ']+pos_counts['CCONJ']
    del pos_counts['SCONJ']
    del pos_counts['CCONJ']
    return pos_counts
Ejemplo n.º 21
0
def team_rate_extractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find(id='pageContent').find(attrs={'class': 'content'})
    tables = con.find_all('table')

    data = []
    for t in tables:
        data.extend(parse_table(t))

    teamrating = []
    for team in data:
        if len(team) > 0:
            if len(team) >= 5:
                teamrating.append([normalize(team[-5]), team[-1]])
            else:
                teamrating.append([normalize(team[-2]), team[-1]])
    teamrating = sorted(teamrating, key=lambda x: float(x[1]), reverse=True)
    teamrating = list(zip(*teamrating))[0]
    return teamrating
Ejemplo n.º 22
0
 def handle(self):
     global counter
     data = bytes.decode(self.request[0].strip())
     socket = self.request[1]
     print("%s : " % self.client_address[0], str(data))
     body = normalize(str(data))
     today = body['dt'].strftime('%Y-%m-%d')
     result = es.index(index=today, doc_type='event', body=body)
     if not result['created']:
         logging.info(str(data))
     counter += 1
     print("Got", counter, "messages")
Ejemplo n.º 23
0
def train(model, loss_fn, optimizer, train_data, val_data, num_epochs=1):
    losses = []
    eval_losses = []
    optimizer = optim.Adam(model.parameters(),
                           lr=INIT_LR * 10 ** -4)  # slow start (to prevent blowup
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d...' % (epoch + 1, num_epochs))
        model.train()
        if epoch == 6:
            print("Lowering rate for refinement")
            optimizer = optim.Adam(model.parameters(), lr=INIT_LR / 10)
        if epoch == 11:
            print("Lowering rate for refinement 2")
            optimizer = optim.Adam(model.parameters(), lr=INIT_LR / 100)
        for t, (x, y) in enumerate(train_data):
            if epoch == 0 and t == 50:
                optimizer = optim.Adam(model.parameters(), lr=INIT_LR)
            # print(t)
            x_var = Variable(normalize(x).permute(0, 3, 1, 2)).type(dtype)
            y_var = Variable(normalize(y).permute(0, 3, 1, 2)).type(dtype)

            scores, oob_loss, _, _, _, _, _ = model(x_var)

            loss = loss_fn(scores, y_var)
            if (t + 1) % PRINT_EVERY == 0:
                norm_loss = calculate_norm_loss(x_var, y_var, scores, loss_fn)
                losses.append(norm_loss)
                print('\ttraining: t = %d, loss = %.4f, norm_loss= %.4f' % (
                      t + 1, loss.data[0], norm_loss))
            if not is_local and t % (len(train_data) // 8) == 0 or overfit_small:
                eval_loss = evaluate(model, val_data, loss_fn)
                eval_losses.append(eval_loss)

            optimizer.zero_grad()
            (loss + oob_loss).backward()
            optimizer.step()

    os.makedirs("losses", exist_ok=True)
    np.save(results_folder + 'losses' + NAME, np.array(losses))
    np.save(results_folder + 'losses/eval_losses' + NAME, np.array(eval_losses))
Ejemplo n.º 24
0
def eval(model, dev_data, loss_fn):
    print("Running evaluation...")
    total_loss = 0.0
    model.eval()
    length = len(dev_data)
    for t, (x, y) in enumerate(dev_data):
        x_var = Variable(normalize(x).permute(0,3,1,2)).type(dtype)
        y_var = Variable(normalize(y).permute(0,3,1,2)).type(dtype)
        
        scores = model(x_var)
        if (t == length-1):
            for i in range(NUM_SAVED_SAMPLES):
                name = "./eval/{}_{}_".format(t, i)
                imsave(name + "gen.png", np.transpose(denorm(scores[i].data.cpu().numpy()), axes=[1,2,0]))
                imsave(name + "gold.png", np.transpose(denorm(y_var[i].data.cpu().numpy()), axes=[1,2,0]))
                x = x_var[i].data.cpu().numpy()
                imsave(name + "orig_0.png", x[:3,:,:])
                imsave(name + "orig_1.png", x[3:,:,:])
        
        total_loss += loss_fn(scores, y_var).data[0]

    print("Total eval loss: %.4f, Avg eval loss: %.4f" % (total_loss, total_loss / NUM_VAL))
Ejemplo n.º 25
0
    def test_can_normalize_data(self):
        lines = load_test_data('weather.dat')
        normal = normalize(lines)
        first_measurement = normal[0]
        last_measurement = normal[-1]
        self.assertEqual(1, first_measurement.day)
        self.assertEqual(59, first_measurement.min)
        self.assertEqual(88, first_measurement.max)
        self.assertEqual(29, first_measurement.delta())

        self.assertEqual(30, last_measurement.day)
        self.assertEqual(45, last_measurement.min)
        self.assertEqual(90, last_measurement.max)
        self.assertEqual(45, last_measurement.delta())
Ejemplo n.º 26
0
    def calculate_score(input_data):
        result = COEFS.copy()

        for i in range(len(result)):
            result[i].append(input_data[i])

        for idx, row in enumerate(COEFS):
            result[idx].append(row[2] * row[-1])

        score = INTERCEPT
        for row in result:
            score += row[-1]

        return normalize(score)
Ejemplo n.º 27
0
def team_extractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find(id='pageContent').find(attrs={'class': 'content'})
    tables = con.find_all('table')

    data = []
    for t in tables:
        data.extend(parse_table(t))

    teams = {}
    for team in data:
        if len(team) > 0:
            teams[normalize(team[-4])] = [team[-1], team[-2], team[-3]]
    return teams
Ejemplo n.º 28
0
    def weigh_match(self, pair):
        init_str = pair[1]
        query_str, completion_str = normalize(pair)

        # пропускает, если подсказок нет или запрос - это не буквы
        if completion_str == 'NULL' or re.fullmatch('\W+', query_str):
            return False

        compare_obj = Compare(query_str, completion_str, init_str)
        Compare.calculate_weight(compare_obj)
        query_weight = compare_obj.max_obj.weight

        # выбирает пары с нужным расстоянием Левенштейна
        if 0 <= query_weight <= 2:
            self.light_match = compare_obj.max_obj
        else:
            self.light_match = None
def sentences_to_indices(X, word_to_index, max_len):

    m = X.shape[0]

    X_indices = np.zeros((m, max_len), dtype=int)

    for i in range(m):

        sentence_words = normalize(X[i]).split()

        j = 0
        for w in sentence_words:
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
            j = j + 1

    return X_indices
Ejemplo n.º 30
0
def read_url(url):

    global count

    url = n.normalize(url, main_url_domain, main_url_ext)

    #check normalizer.py mailto: condition
    if url is not None:
        url_request = requests.get(url)

        count += 1
        print(count)
        url_domain = s.extract(url)["url_domain"]

        is_ok = True

        if url_request.status_code >= 400:

            broken_links.append(url)
            is_ok = False

            write_broken = url + "," + str(url_request.status_code) + "\n"
            broken_file.write(write_broken)

        print(url_request.status_code)
        soup = BeautifulSoup(url_request.content,
                             "html.parser",
                             from_encoding="iso-8859-1")

        url_list = soup.find_all('a', href=True)
        checked_links.append(url)

        write_checked = str(count) + "," + url + "," + str(
            url_request.status_code) + "," + str(is_ok) + "\n"
        checked_file.write(write_checked)

        if url_domain == main_url_domain:

            for link in url_list:

                if link['href'] not in checked_links:
                    print(link['href'])
                    read_url(link['href'])
Ejemplo n.º 31
0
 def run_episode(self,
                 env,
                 normalizer,
                 addOrSubtractOperator,
                 delta=None,
                 render=False):
     """Gets the total reward for an episode"""
     total_reward = 0
     state = env.reset()
     for episode_number in range(self.options['MAX_EPISODES']):
         if render:
             env.render()
         normalizer.observe(state)
         state = normalizer.normalize(state)
         action = self.policy(state, addOrSubtractOperator, delta)
         state, reward, done, info = env.step(action)
         reward = max(min(reward, 1), -1)
         total_reward += reward
         if done:
             break
     env.env.close()
     return total_reward
Ejemplo n.º 32
0
def scoreboardextractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find('table')
    rows = con.find('tbody').find_all('td')

    scoreboard = []
    for row in rows:
        if row.text and '1' not in row.text and len(
                row.text) > 4 and 'tries' not in row.text:
            text = row.text
            for key in REGIONS:
                if text.startswith(key) and not REGIONS[key]:
                    # print(text)
                    text = text[len(key):]
                    REGIONS[key] = True

            scoreboard.append(normalize(text))

    # for i in range(len(scoreboard)):
    #     print(i, scoreboard[i])
    return scoreboard
Ejemplo n.º 33
0
def calculate_score(input_data):
    """
    Given the intercet and model coefficients, this function calculates the
    score of an input
    """
    result = COEFS.copy()
    # find answers that were true and flag as 1, 0 otherwise
    for idx, row in enumerate(COEFS):
        if row[1] in input_data.keys():
            result[idx].append(1)
        else:
            result[idx].append(0)

    # multiply the flag by the coefficient to get the points
    for idx, row in enumerate(COEFS):
        result[idx].append(row[2] * row[-1])

    # sum all points and the intercept
    score = INTERCEPT
    for row in result:
        score += row[-1]

    return normalize(score)
Ejemplo n.º 34
0
def do_predict_multiple(data):
    output = {}

    count = 0

    for l in xrange(len(data) - 1, MIN_LENGTH, -1):
    # for l in xrange(MIN_LENGTH, len(data)):
        for i in xrange(len(data) - l + 1 - 1, -1, -1):
            if count > max_analysis_count:
                break

            current = normalizer.normalize(np.array(data)[i:i+l])
            result = model.predict(current)[0]
            result = LABEL_LIST[int(result)]

            if result not in output:
                output[result] = 1
            else:
                output[result] += 1

            if result != 'random' and result != 'horizontal' and result != 'vertical':
                count += 1

    return output
def process_data_row(label, file_name):
    data = np.genfromtxt(file_name, delimiter=', ', dtype = int)
    data = normalizer.normalize(data)

    X.append(data)
    y.append(LABELS[label])
Ejemplo n.º 36
0
expected_return = expected_annual_return / (365.0 / days_owned) #Percent
price -= upcoming_dividend

broker_cut = (7.95 + 0.75 * contracts_purchased) / (contracts_purchased * 100)
if contracts_purchased == 10: # A hacky way for representing that "10" contracts purchased is just my way of dividing everything by 10
    broker_cut = 0.087 / 10

beta = 1
end_price_list = np.random.gamma(price, beta, 10000)


# Normalize the list by looping through normalization methods to get (a) desired [expected] standard deviation and (b) desired [expected] average return
total_range = 0
for i in range(1,10000,1):
    step = i / 10.0
    temp_end_price_list = normalize(end_price_list, minimum = 0, total_range = step)
    std_dev_actual = (np.std(map(lambda x: x - price, temp_end_price_list))/price * 100)
    if abs(std_dev_actual - desired_stddev) < 0.1: #.1%
        print "Using total range of %.2f, actual std dev is %.2f, desired std dev is %.2f" % (step, std_dev_actual, desired_stddev)
        end_price_list = temp_end_price_list
        total_range = step
        break 

start_step = 1
if desired_stddev > 16:
    start_step = -10000
if desired_stddev > 12:
    start_step = -6000
elif desired_stddev > 9:
    start_step = -2000
for i in range(start_step,10000,1):
Ejemplo n.º 37
0
		pluscomment = pluscomment.replace("※", "")
		pluscomment = pluscomment.replace("∴", "")
		pluscomment = pluscomment.replace("*", "")
		pluscomment = pluscomment.replace("+", "")
		pluscomment = pluscomment.replace("・", "")
		pluscomment = pluscomment.replace("°", "")
		pluscomment = pluscomment.replace("w", "")
		"""
		pluscomment = pluscomment.replace("null", "")
		pluscomment = pluscomment.replace("\n", "")
		pluscomment = pluscomment.replace("\t", "")
		pluscomment = pluscomment.replace(" ", "")
		pluscomment = pluscomment.replace(" ", "")
		pluscomment = pluscomment.replace("ぁ", "あ")
		pluscomment = re.sub(re.compile("[!-/:-@[-`{-~]"), '', pluscomment)
		pluscomment = normalize(pluscomment.decode("utf-8"))
		pluscomment = pluscomment.replace(u" ", "")
		pluscomment = pluscomment.replace(u" ", "")
		#さけび声対策
		pluscomment = pluscomment.replace(u"ーー",u"ー")
		if pluscomment != '':
			pluscomment = tagger.parse(pluscomment.encode("utf-8"))
			#pluscomment = pluscomment.replace("\n"," ")
			pluscomment = pluscomment.replace("  "," ")
			fo.write(pluscomment)
	thread[ID][j]["comment"] = commenttext
	fo.write("\n")
	fo.close()

files = os.listdir('../data/tcserv.nii.ac.jp/access/[email protected]/832c5b059b15f647/nicocomm/data/video')
for nfile in files[1:2]:
Ejemplo n.º 38
0
tagger = MeCab.Tagger( '-Owakati -u /usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/ruiter-keyword.dic, /usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/wikipedia-keyword.dic,/usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/hatena-keyword.dic')
#commentfiles = os.listdir('comment')
for j in thread.keys():
	filename = ("businesstexts/" + str(j) + ".txt")
	fo = file(filename,'w')
	#filename = ("comment2_" + ID + "/" + "sm20158." +"txt")
	commenttext = ''
	text = thread[j]['bodyText']
	if ".T)" in text:
		point = text.find(".T)")
		meigararmei =  text[(point-14):(point + 3)]
		text = text.replace(meigararmei,"")
	sentence = text.replace("。"," ")
	if sentence != '':
		sentence = unicodedata.normalize("NFKC", "".join(unicodedata.normalize("NFKC", sentence.decode("utf-8")).split()))
		sentence = normalize(sentence)
		sentence = sentence.lower()
		sentence = re.sub(re.compile("[!-/:-@[-`{-~]"), '', sentence.encode("utf-8"))
		sentence = sentence.replace(" ", "")
		sentence = sentence.replace(" ", "")
		sentence = sentence.replace("、", "")
		sentence = tagger.parse(sentence)
		fo.write(sentence)
	else:
		continue
	fo.write("\n")
	fo.close()


filename = ("allbuisinessnews.txt")
fo = file(filename,'w')
Ejemplo n.º 39
0
def do_predict_single(data):
    current = normalizer.normalize(np.array(data))
    result = model.predict(current)[0]
    result = LABEL_LIST[int(result)]
    return { result : 1 }