Ejemplos de normalize en Python, ejemplos de normalizer.normalize en Python

Ejemplo n.º 1

0

Mostrar archivo

def lemmatize(input,multiList=False,cascade=True):

	stemmed = []
	if cascade == True:
		if multiList == False :
			filtered = stopwordremover.remove_stop_word(input)
		else:
			filtered = stopwordremover.remove_stop_word(input,True)

		for word in filtered:
			stemmed.append(lemmatizationEngine(word))

		stemmed = list(set(stemmed))
		return stemmed

	else :

		if multiList == False:
			lst = normalizer.normalize(input)
		else :
			lst = normalizer.normalize(input,True)

		for word in lst:
			stemmed.append(lemmatizationEngine(word))

		#stemmed = list(set(stemmed))
		return stemmed

Ejemplo n.º 2

0

Mostrar archivo

 def test_asciichars(self):
     """Fix ASCII characters"""
     self.assertEqual(norm.normalize("What’s up"), "what is up")
     self.assertEqual(norm.normalize("What's up"), "what is up")
     self.assertEqual(norm.normalize("I said “shut up”"),
                      'I said "shut up"')
     self.assertEqual(norm.normalize("œ"), '')

Ejemplo n.º 3

0

Mostrar archivo

 def test_replacesubstitutes(self):
     """should replace subsitutes"""
     self.assertEqual(norm.normalize("Nov 1st I weighed 90 kgs. total"),
                      "November 1st I weighed 90 kilograms total")
     self.assertEqual(
         norm.normalize("I shared it on FB w/ friends, ie: you"),
         "I shared it on Facebook with friends, for example : you")

Ejemplo n.º 4

0

Mostrar archivo

def remove_stop_word(input, multiList=False):
    if isinstance(input, str):
        filtered = [
            word for word in normalizer.normalize(input)
            if (word not in stopwords['english'] and not word.isdigit())
        ]
        return filtered

    if not isinstance(input, basestring):
        if multiList == True:
            for index, lst in input:
                input[index] = [
                    word for word in normalizer.normalize(lst)
                    (word not in stopwords['english'] and not word.isdigit())
                ]
            return input

        else:
            input = [
                word for word in normalizer.normalize(input)(
                    word not in stopwords['english'] and not word.isdigit())
            ]
            return input


#print tokenizer.tokenize('sdfdsf sdffsd sdfsdfds')
#print remove_stop_word('hello i Am mayank. I Am a Good boy')

Ejemplo n.º 5

0

Mostrar archivo

 def test_contractions(self):
     """should expand contractions"""
     self.assertEqual(norm.normalize("I'm on the yelow zebra"),
                      "I am on the yellow zebra")
     self.assertEqual(norm.normalize("I'll listen to y'all"),
                      "I will listen to you all")
     self.assertEqual(norm.normalize("do n't make it right"),
                      "do not make it right")
     self.assertEqual(norm.normalize("it's all good"), "it is all good")

Ejemplo n.º 6

0

Mostrar archivo

Archivo: stream_reader.py Proyecto: dankNecessities/keywordSpotter

    def echo(self, data, start, end):
        if (self.audio_file is not None):
            recording = self.asource.read()
        else:
            recording = b''.join(data)
            print("Acoustic Activity at: {0}--{1}".format(start, end))

        print(recording)
        #data = np.array(data)
        #serialized = np.frombuffer(data)

        #print(len(hex_data))
        #print(len(recording))
        normalize(recording)
        pad_tokens('tmp.wav2')
        sample_rate, normalized_signal = wavfile.read('tmp.wav2')
        print(sample_rate)
        print(len(normalized_signal))
        banks = convert_to_mel(normalized_signal)
        banks = np.array(banks)
        Banks = banks.reshape(1, 98, 40, 1)
        #np.save('test.npy', Banks)

        z = self.model.predict(Banks)
        p = z[0].tolist().index(max(z[0]))
        self.recognized_keyword = self.categories[p]
        print(self.recognized_keyword)
        '''
		frame_length, step_size = 16000, 64000
		no_of_shifts = int(64000 / step_size) - int(frame_length / step_size)
		print(no_of_shifts)
		#keyword = None
		prob = 0
		for i in range(0, no_of_shifts):
			l = int(i*320)
			banks = convert_to_mel(normalized_signal[l:l+frame_length])
			banks = np.array(banks)
			np.save('test.npy', banks)
			#banks = np.load('sd.npy')
			Banks = banks.reshape(1, 98, 40, 1)
			z = self.model.predict(Banks)
			print(z)
			p = z[0].tolist().index(max(z[0]))
			prob += p
			self.recognized_keyword = self.categories[p]
			#self.recognized_keyword = self.categories[np.argmax(z[0], -1)]
			#com = self.commands[p]
			print(self.recognized_keyword)
		final = int(np.ceil(prob/no_of_shifts))
		print(prob/no_of_shifts)
		print(final)
		self.recognized_keyword = self.categories[final]'''

        K.clear_session()
        os.remove('tmp.wav2')
        os.remove('tmp.wav')

Ejemplo n.º 7

0

Mostrar archivo

def load_problem(dom_name):
    print "Parsing..."
    p = parser.Problem(dom_name)
    p.max_faults = -1

    print "Normalizing..."
    for a in p.actions:
        normalize(a)

    print "Ready!"

    return p

Ejemplo n.º 8

0

Mostrar archivo

def merge_clauses(sentences):

    """
    этот метод получает json в виде списка "язык-предлоэение"
    {
        'ru':russian_sentence
        'en':english_sentence
    }

    возращает список клауз + код ответа
    {
        'clauses':
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        [{'ru':rus_clause_i.'en':eng_clause_i}]
        response:
            {code:0,
            description:''}

    }

    """
    input = json.loads(sentences)

    sent_rus = input['ru']
    sent_en = input['en']

    sent1 = json.loads(split_to_clauses(json.dumps({'ru':sent_rus})))
    sent2 = json.loads(split_to_clauses(json.dumps({'en':sent_en})))

    checking = check_stream(sent1, sent2)


    if checking['code'] == 1:
        zipped_clauses =  zip(sent1['clauses'], sent2['clauses'])
    else:
        zipped_clauses = None

    #пока что zipped_clauses[i][0] - русские клаузы,  zipped_clauses[i][2] - английские
    #со временем необходимо переделать метод под произвольные языки

    #инициализируем переменную для вывода
    output = {'clauses':[],'response':checking}

    if(zipped_clauses):
        for pairs in zipped_clauses:
            output['clauses'].append({'ru':normalizer.normalize(pairs[0]),'en':normalizer.normalize(pairs[1])})
    else:
        output['clauses'] = None

    print json.dumps(output)
    return json.dumps(output)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: run_model.py Proyecto: wpfhtl/cs231n_proj

def evaluate(model, dev_data, loss_fn, save=False):
    print("Running evaluation...")

    model.eval()
    length = len(dev_data)

    # loss metrics
    l2_loss_fn = loss_fns.L2Loss()
    all_loss = []
    l2_losses = []
    for t, (x, y) in enumerate(dev_data):
        x_copy = np.copy(x.numpy())
        x_var = Variable(normalize(x).permute(0, 3, 1, 2)).type(dtype)
        y_var = Variable(normalize(y).permute(0, 3, 1, 2)).type(dtype)

        scores, _, C, M1, M2, res_img1, res_img2 = model(x_var)
        if (t >= length - 2 and save):
            extra = results_folder + "extra/"
            os.makedirs(extra, exist_ok=True)
            for i in range(NUM_SAVED_SAMPLES):
                name = results_folder + "{}_{}_".format(t, i)
                convert_and_save(name + "gen.png", scores[i])
                convert_and_save(name + "gold.png", y_var[i])
                try:
                    convert_and_save(extra + "resgen1.png", res_img1[i])
                    convert_and_save(extra + "resgen2.png", res_img2[i])
                except Exception:
                    print(traceback.format_exc())

                # np.save(name + 'C', C.data.cpu().numpy())
                try:
                    np.save(extra + 'M1', M1.data.cpu().numpy())
                    np.save(extra + 'M2', M2.data.cpu().numpy())
                except Exception:
                    print(traceback.format_exc())
                # convert_and_save(name + "__Cx.png", )
                x_res = x_copy[i]
                try:
                    imsave(extra + "orig_0.png", x_res[:, :, :3])
                    imsave(extra + "orig_1.png", x_res[:, :, 3:])
                except Exception:
                    print(traceback.format_exc())

        all_loss.append(calculate_norm_loss(x_var, y_var, scores, loss_fn))
        l2_losses.append(calculate_norm_loss(x_var, y_var, scores, l2_loss_fn))

    total_loss = sum(all_loss) / len(all_loss)
    total_l2_loss = sum(l2_losses) / len(l2_losses)
    print("Eval norm l2 loss: %.4f, norm total loss: %.4f" % (total_l2_loss, total_loss))
    return total_loss

Ejemplo n.º 10

0

Mostrar archivo

    def _load(self):
        """
        Validates and normalizes Batch data
        Updates member `loaded_status` with `OK`, `BATCH_NO_DATA`, `BATCH_NOT_VALID` or `BATCH_NOT_NORMALIZED`
        :return: None
        """
        if self._data is None:
            logging.info('No data was found')
            self.load_status = BATCH_NO_DATA
            return

        status, message = validate(self._data)

        if status != OK:
            logging.info('Validation failed : ' + message)
            self.load_status = BATCH_NOT_VALID
            return

        self.name = self._data['name']
        self.icon_path = parse(self._data['icon_path'])
        tags, tasks, status = normalize(self._data)

        if status != OK:
            logging.info('Batch normalization failed')
            self.load_status = BATCH_NOT_NORMALIZED
            return

        self.tags = tags
        self.tasks = tasks
        self.load_status = OK

Ejemplo n.º 11

0

Mostrar archivo

	def train(self,X,y): 
		theta0 = self.roll(self.theta)
		X,self.mean,self.std = normalize(X)
		self.nTrainingExamples = X.shape[0]
		results = minimizer(lambda x: self.cost_function(X,y,x),theta0,approx_grad = False)
		self.theta = self.unroll(self.theta,results[0])
		return results

Ejemplo n.º 12

0

Mostrar archivo

Archivo: eliza.py Proyecto: cympfh/eliza2

 def react(self, status):
     from_user = status.author.screen_name
     text = N.normalize(status.text)
     reply = self.lang.gen(text)
     print(from_user, text, reply)
     reply = "@{} {}".format(from_user, reply)[0:140]
     self.api.update_status(reply, status.id_str)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: main.py Proyecto: atomicbomber-git/text-to-speech-preparator

def process():

    # Mengakses data form dari request HTTP
    text = request.form.get("text", "")

    # Melakukan preprocessing
    text = preprocess(text)

    # Melakukan tagging
    text = tag(text, "http://localhost:7000")

    # Melakukan chunking
    text = chunk(text)

    # Melakukan proses normalisasi
    text = normalize(text)

    # Membuat response HTTP dengan format JSON yang berisi teks yang telah diproses
    return jsonify({
        "status": "success",
        "message": "Request successful",
        "data": {
            "text": text
        }
    })

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_normalizer.py Proyecto: mz2449/embryo-analyzer

    def test_normalizer(self):
        norm_file = csv_functions.csv_open('test_norm.csv')
        expected = [[100.0, 0], [90.0, 5], [80.0, 7.5], [70.0, 25], [60.0, 40],
                    [50.0, 50], [40.0, 40], [30.0, 22.5], [20.0, 17.5],
                    [10.0, 7.5], [0.0, 2.5]]
        actual = csv_functions.csv_open('test_1.csv')
        actual = pixel_to_embryo_length.pixel_to_embryo_length(actual)
        normalizer.normalize(actual, norm_file)
        self.assertEqual(expected, actual)

        expected = [[100.0, 0], [90.0, 4], [80.0, 6], [70.0, 20], [60.0, 32],
                    [50.0, 40], [40.0, 32], [30.0, 18], [20.0, 14], [10.0, 6],
                    [0.0, 2]]
        actual = csv_functions.csv_open('test_2.csv')
        actual = pixel_to_embryo_length.pixel_to_embryo_length(actual)
        normalizer.normalize(actual, norm_file)
        self.assertEqual(expected, actual)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: story_parser.py Proyecto: day279/ReadingRaces

    def parse(self):
        t = tokenizer.Tokenizer()
        for word in t.get_tokens(normalize(self.file_name)):
            self.process(word)
        if self.save:
            self.dictionary.save()

        return 0

Ejemplo n.º 16

0

Mostrar archivo

Archivo: subset.py Proyecto: erning/subset

def is_subset(a, b):
    '''
    params a, b are expressions in string
    return True if a ⊆ b or False if a ⊈ b
    '''
    import parser
    import normalizer

    if type(a) is str:
        a = parser.parse(a)
    a = normalizer.normalize(a)

    if type(b) is str:
        b = parser.parse(b)
    b = normalizer.normalize(b)

    return _is_subset(a, b)

Ejemplo n.º 17

0

Mostrar archivo

def train(model, loss_fn, optimizer, train_data, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d...' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(train_data):
            x_var = Variable(normalize(x).permute(0,3,1,2)).type(dtype)
            y_var = Variable(normalize(y).permute(0,3,1,2)).type(dtype)

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % PRINT_EVERY == 0:
                print('\tt = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: stopwordremover.py Proyecto: codehunks/cloud

def remove_stop_word(input,multiList=False):
	if isinstance(input,str):
		filtered = [word for word in normalizer.normalize(input) if (word not in stopwords['english'] and not word.isdigit())]
		return filtered

	if not isinstance(input, basestring):
		if multiList == True:
			for index,lst in input:
				input[index] = [word for word in normalizer.normalize(lst) (word not in stopwords['english'] and not word.isdigit())]
			return input

		else :
			input = [word for word in normalizer.normalize(input) (word not in stopwords['english'] and not word.isdigit())]
			return input


#print tokenizer.tokenize('sdfdsf sdffsd sdfsdfds')
#print remove_stop_word('hello i Am mayank. I Am a Good boy')

Ejemplo n.º 19

0

Mostrar archivo

def read_url(url):

    checked_links.append(url)

    url = n.normalize(url, main_url_domain, main_url_ext)

    # check normalizer.py mailto: condition
    print("Fetching page at {}...".format(url), end='')
    if url is not None:
        try:
            url_request = requests.get(url)
        except Exception:
            print("Could not read url...")
            return None
        print("...done")

        if url != main_url:
            print("Checking: ", url)
            url_domain = s.extract(url)["url_domain"]
        else:
            url_domain = main_url_domain

        is_ok = True

        if url_request.status_code >= 400:

            broken_links.append(url)
            is_ok = False

            write_broken = url + "," + str(url_request.status_code) + "\n"
            broken_file.write(write_broken)
            print("* Broken url: ", url)
            print("")
            return None

        soup = BeautifulSoup(url_request.content,
                             "html.parser",
                             from_encoding="iso-8859-1")

        print("Looking for links on the webpage...", end='')
        url_list = soup.find_all('a', href=True)
        print("...done")
        print("")

        write_checked = url + "," \
            + str(url_request.status_code) + "," + str(is_ok) + "\n"

        checked_file.write(write_checked)

        if url_domain == main_url_domain:
            for link in url_list:
                if not link['href']:
                    continue

                if link['href'] not in checked_links:
                    read_url(link['href'])

Ejemplo n.º 20

0

Mostrar archivo

def pos_tags_count(text):
    pos_counts = {}
    parsed_tokens = normalize(text)
    pos_tags = get_only_pos(parsed_tokens)
    unique_tags = ['VERB','ADJ','NOUN','ADV','NUM','SCONJ','CCONJ','CONJ']
    for tag in unique_tags:
        pos_counts[tag] = pos_tags.count(tag)/len(pos_tags)
    pos_counts['CONJ'] = pos_counts['SCONJ']+pos_counts['CCONJ']
    del pos_counts['SCONJ']
    del pos_counts['CCONJ']
    return pos_counts

Ejemplo n.º 21

0

Mostrar archivo

Archivo: cfteamrateextractor.py Proyecto: PIE0/icpc-amarchi

def team_rate_extractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find(id='pageContent').find(attrs={'class': 'content'})
    tables = con.find_all('table')

    data = []
    for t in tables:
        data.extend(parse_table(t))

    teamrating = []
    for team in data:
        if len(team) > 0:
            if len(team) >= 5:
                teamrating.append([normalize(team[-5]), team[-1]])
            else:
                teamrating.append([normalize(team[-2]), team[-1]])
    teamrating = sorted(teamrating, key=lambda x: float(x[1]), reverse=True)
    teamrating = list(zip(*teamrating))[0]
    return teamrating

Ejemplo n.º 22

0

Mostrar archivo

 def handle(self):
     global counter
     data = bytes.decode(self.request[0].strip())
     socket = self.request[1]
     print("%s : " % self.client_address[0], str(data))
     body = normalize(str(data))
     today = body['dt'].strftime('%Y-%m-%d')
     result = es.index(index=today, doc_type='event', body=body)
     if not result['created']:
         logging.info(str(data))
     counter += 1
     print("Got", counter, "messages")

Ejemplo n.º 23

0

Mostrar archivo

Archivo: run_model.py Proyecto: wpfhtl/cs231n_proj

def train(model, loss_fn, optimizer, train_data, val_data, num_epochs=1):
    losses = []
    eval_losses = []
    optimizer = optim.Adam(model.parameters(),
                           lr=INIT_LR * 10 ** -4)  # slow start (to prevent blowup
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d...' % (epoch + 1, num_epochs))
        model.train()
        if epoch == 6:
            print("Lowering rate for refinement")
            optimizer = optim.Adam(model.parameters(), lr=INIT_LR / 10)
        if epoch == 11:
            print("Lowering rate for refinement 2")
            optimizer = optim.Adam(model.parameters(), lr=INIT_LR / 100)
        for t, (x, y) in enumerate(train_data):
            if epoch == 0 and t == 50:
                optimizer = optim.Adam(model.parameters(), lr=INIT_LR)
            # print(t)
            x_var = Variable(normalize(x).permute(0, 3, 1, 2)).type(dtype)
            y_var = Variable(normalize(y).permute(0, 3, 1, 2)).type(dtype)

            scores, oob_loss, _, _, _, _, _ = model(x_var)

            loss = loss_fn(scores, y_var)
            if (t + 1) % PRINT_EVERY == 0:
                norm_loss = calculate_norm_loss(x_var, y_var, scores, loss_fn)
                losses.append(norm_loss)
                print('\ttraining: t = %d, loss = %.4f, norm_loss= %.4f' % (
                      t + 1, loss.data[0], norm_loss))
            if not is_local and t % (len(train_data) // 8) == 0 or overfit_small:
                eval_loss = evaluate(model, val_data, loss_fn)
                eval_losses.append(eval_loss)

            optimizer.zero_grad()
            (loss + oob_loss).backward()
            optimizer.step()

    os.makedirs("losses", exist_ok=True)
    np.save(results_folder + 'losses' + NAME, np.array(losses))
    np.save(results_folder + 'losses/eval_losses' + NAME, np.array(eval_losses))

Ejemplo n.º 24

0

Mostrar archivo

def eval(model, dev_data, loss_fn):
    print("Running evaluation...")
    total_loss = 0.0
    model.eval()
    length = len(dev_data)
    for t, (x, y) in enumerate(dev_data):
        x_var = Variable(normalize(x).permute(0,3,1,2)).type(dtype)
        y_var = Variable(normalize(y).permute(0,3,1,2)).type(dtype)
        
        scores = model(x_var)
        if (t == length-1):
            for i in range(NUM_SAVED_SAMPLES):
                name = "./eval/{}_{}_".format(t, i)
                imsave(name + "gen.png", np.transpose(denorm(scores[i].data.cpu().numpy()), axes=[1,2,0]))
                imsave(name + "gold.png", np.transpose(denorm(y_var[i].data.cpu().numpy()), axes=[1,2,0]))
                x = x_var[i].data.cpu().numpy()
                imsave(name + "orig_0.png", x[:3,:,:])
                imsave(name + "orig_1.png", x[3:,:,:])
        
        total_loss += loss_fn(scores, y_var).data[0]

    print("Total eval loss: %.4f, Avg eval loss: %.4f" % (total_loss, total_loss / NUM_VAL))

Ejemplo n.º 25

0

Mostrar archivo

Archivo: test.py Proyecto: andrisbriedis/data-munging

    def test_can_normalize_data(self):
        lines = load_test_data('weather.dat')
        normal = normalize(lines)
        first_measurement = normal[0]
        last_measurement = normal[-1]
        self.assertEqual(1, first_measurement.day)
        self.assertEqual(59, first_measurement.min)
        self.assertEqual(88, first_measurement.max)
        self.assertEqual(29, first_measurement.delta())

        self.assertEqual(30, last_measurement.day)
        self.assertEqual(45, last_measurement.min)
        self.assertEqual(90, last_measurement.max)
        self.assertEqual(45, last_measurement.delta())

Ejemplo n.º 26

0

Mostrar archivo

    def calculate_score(input_data):
        result = COEFS.copy()

        for i in range(len(result)):
            result[i].append(input_data[i])

        for idx, row in enumerate(COEFS):
            result[idx].append(row[2] * row[-1])

        score = INTERCEPT
        for row in result:
            score += row[-1]

        return normalize(score)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: cfblogparser.py Proyecto: PIE0/icpc-amarchi

def team_extractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find(id='pageContent').find(attrs={'class': 'content'})
    tables = con.find_all('table')

    data = []
    for t in tables:
        data.extend(parse_table(t))

    teams = {}
    for team in data:
        if len(team) > 0:
            teams[normalize(team[-4])] = [team[-1], team[-2], team[-3]]
    return teams

Ejemplo n.º 28

0

Mostrar archivo

    def weigh_match(self, pair):
        init_str = pair[1]
        query_str, completion_str = normalize(pair)

        # пропускает, если подсказок нет или запрос - это не буквы
        if completion_str == 'NULL' or re.fullmatch('\W+', query_str):
            return False

        compare_obj = Compare(query_str, completion_str, init_str)
        Compare.calculate_weight(compare_obj)
        query_weight = compare_obj.max_obj.weight

        # выбирает пары с нужным расстоянием Левенштейна
        if 0 <= query_weight <= 2:
            self.light_match = compare_obj.max_obj
        else:
            self.light_match = None

Ejemplo n.º 29

0

Mostrar archivo

Archivo: utils.py Proyecto: Arabic-NLP-UofK/NLU--ArabicDialogueSystems

def sentences_to_indices(X, word_to_index, max_len):

    m = X.shape[0]

    X_indices = np.zeros((m, max_len), dtype=int)

    for i in range(m):

        sentence_words = normalize(X[i]).split()

        j = 0
        for w in sentence_words:
            if w in word_to_index:
                X_indices[i, j] = word_to_index[w]
            j = j + 1

    return X_indices

Ejemplo n.º 30

0

Mostrar archivo

Archivo: finder.py Proyecto: Polaris000/Python-Scripts

def read_url(url):

    global count

    url = n.normalize(url, main_url_domain, main_url_ext)

    #check normalizer.py mailto: condition
    if url is not None:
        url_request = requests.get(url)

        count += 1
        print(count)
        url_domain = s.extract(url)["url_domain"]

        is_ok = True

        if url_request.status_code >= 400:

            broken_links.append(url)
            is_ok = False

            write_broken = url + "," + str(url_request.status_code) + "\n"
            broken_file.write(write_broken)

        print(url_request.status_code)
        soup = BeautifulSoup(url_request.content,
                             "html.parser",
                             from_encoding="iso-8859-1")

        url_list = soup.find_all('a', href=True)
        checked_links.append(url)

        write_checked = str(count) + "," + url + "," + str(
            url_request.status_code) + "," + str(is_ok) + "\n"
        checked_file.write(write_checked)

        if url_domain == main_url_domain:

            for link in url_list:

                if link['href'] not in checked_links:
                    print(link['href'])
                    read_url(link['href'])

Ejemplo n.º 31

0

Mostrar archivo

 def run_episode(self,
                 env,
                 normalizer,
                 addOrSubtractOperator,
                 delta=None,
                 render=False):
     """Gets the total reward for an episode"""
     total_reward = 0
     state = env.reset()
     for episode_number in range(self.options['MAX_EPISODES']):
         if render:
             env.render()
         normalizer.observe(state)
         state = normalizer.normalize(state)
         action = self.policy(state, addOrSubtractOperator, delta)
         state, reward, done, info = env.step(action)
         reward = max(min(reward, 1), -1)
         total_reward += reward
         if done:
             break
     env.env.close()
     return total_reward

Ejemplo n.º 32

0

Mostrar archivo

Archivo: scoreboardparser.py Proyecto: PIE0/icpc-amarchi

def scoreboardextractor(local=False):
    soup = get_html_soup(LOCAL_PATH, LINK, local)

    con = soup.find('table')
    rows = con.find('tbody').find_all('td')

    scoreboard = []
    for row in rows:
        if row.text and '1' not in row.text and len(
                row.text) > 4 and 'tries' not in row.text:
            text = row.text
            for key in REGIONS:
                if text.startswith(key) and not REGIONS[key]:
                    # print(text)
                    text = text[len(key):]
                    REGIONS[key] = True

            scoreboard.append(normalize(text))

    # for i in range(len(scoreboard)):
    #     print(i, scoreboard[i])
    return scoreboard

Ejemplo n.º 33

0

Mostrar archivo

Archivo: scorer.py Proyecto: cshravankumar/ML-RESTful-API

def calculate_score(input_data):
    """
    Given the intercet and model coefficients, this function calculates the
    score of an input
    """
    result = COEFS.copy()
    # find answers that were true and flag as 1, 0 otherwise
    for idx, row in enumerate(COEFS):
        if row[1] in input_data.keys():
            result[idx].append(1)
        else:
            result[idx].append(0)

    # multiply the flag by the coefficient to get the points
    for idx, row in enumerate(COEFS):
        result[idx].append(row[2] * row[-1])

    # sum all points and the intercept
    score = INTERCEPT
    for row in result:
        score += row[-1]

    return normalize(score)

Ejemplo n.º 34

0

Mostrar archivo

Archivo: server.py Proyecto: hptruong93/MouseGestureRecognition

def do_predict_multiple(data):
    output = {}

    count = 0

    for l in xrange(len(data) - 1, MIN_LENGTH, -1):
    # for l in xrange(MIN_LENGTH, len(data)):
        for i in xrange(len(data) - l + 1 - 1, -1, -1):
            if count > max_analysis_count:
                break

            current = normalizer.normalize(np.array(data)[i:i+l])
            result = model.predict(current)[0]
            result = LABEL_LIST[int(result)]

            if result not in output:
                output[result] = 1
            else:
                output[result] += 1

            if result != 'random' and result != 'horizontal' and result != 'vertical':
                count += 1

    return output

Ejemplo n.º 35

0

Mostrar archivo

Archivo: main_model.py Proyecto: hptruong93/MouseGestureRecognition

def process_data_row(label, file_name):
    data = np.genfromtxt(file_name, delimiter=', ', dtype = int)
    data = normalizer.normalize(data)

    X.append(data)
    y.append(LABELS[label])

Ejemplo n.º 36

0

Mostrar archivo

Archivo: sim.py Proyecto: breppert/options_calculator

expected_return = expected_annual_return / (365.0 / days_owned) #Percent
price -= upcoming_dividend

broker_cut = (7.95 + 0.75 * contracts_purchased) / (contracts_purchased * 100)
if contracts_purchased == 10: # A hacky way for representing that "10" contracts purchased is just my way of dividing everything by 10
    broker_cut = 0.087 / 10

beta = 1
end_price_list = np.random.gamma(price, beta, 10000)


# Normalize the list by looping through normalization methods to get (a) desired [expected] standard deviation and (b) desired [expected] average return
total_range = 0
for i in range(1,10000,1):
    step = i / 10.0
    temp_end_price_list = normalize(end_price_list, minimum = 0, total_range = step)
    std_dev_actual = (np.std(map(lambda x: x - price, temp_end_price_list))/price * 100)
    if abs(std_dev_actual - desired_stddev) < 0.1: #.1%
        print "Using total range of %.2f, actual std dev is %.2f, desired std dev is %.2f" % (step, std_dev_actual, desired_stddev)
        end_price_list = temp_end_price_list
        total_range = step
        break 

start_step = 1
if desired_stddev > 16:
    start_step = -10000
if desired_stddev > 12:
    start_step = -6000
elif desired_stddev > 9:
    start_step = -2000
for i in range(start_step,10000,1):

Ejemplo n.º 37

0

Mostrar archivo

Archivo: commentyomikomi0924kaigyou.py Proyecto: ItoTomoki/GCI

		pluscomment = pluscomment.replace("※", "")
		pluscomment = pluscomment.replace("∴", "")
		pluscomment = pluscomment.replace("*", "")
		pluscomment = pluscomment.replace("+", "")
		pluscomment = pluscomment.replace("・", "")
		pluscomment = pluscomment.replace("°", "")
		pluscomment = pluscomment.replace("w", "")
		"""
		pluscomment = pluscomment.replace("null", "")
		pluscomment = pluscomment.replace("\n", "")
		pluscomment = pluscomment.replace("\t", "")
		pluscomment = pluscomment.replace(" ", "")
		pluscomment = pluscomment.replace("　", "")
		pluscomment = pluscomment.replace("ぁ", "あ")
		pluscomment = re.sub(re.compile("[!-/:-@[-`{-~]"), '', pluscomment)
		pluscomment = normalize(pluscomment.decode("utf-8"))
		pluscomment = pluscomment.replace(u" ", "")
		pluscomment = pluscomment.replace(u"　", "")
		#さけび声対策
		pluscomment = pluscomment.replace(u"ーー",u"ー")
		if pluscomment != '':
			pluscomment = tagger.parse(pluscomment.encode("utf-8"))
			#pluscomment = pluscomment.replace("\n"," ")
			pluscomment = pluscomment.replace("  "," ")
			fo.write(pluscomment)
	thread[ID][j]["comment"] = commenttext
	fo.write("\n")
	fo.close()

files = os.listdir('../data/tcserv.nii.ac.jp/access/[email protected]/832c5b059b15f647/nicocomm/data/video')
for nfile in files[1:2]:

Ejemplo n.º 38

0

Mostrar archivo

Archivo: businessnewsyomikomi.py Proyecto: ItoTomoki/ruiternews

tagger = MeCab.Tagger( '-Owakati -u /usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/ruiter-keyword.dic, /usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/wikipedia-keyword.dic,/usr/local/Cellar/mecab/0.996/lib/mecab/dic/ipadic/hatena-keyword.dic')
#commentfiles = os.listdir('comment')
for j in thread.keys():
	filename = ("businesstexts/" + str(j) + ".txt")
	fo = file(filename,'w')
	#filename = ("comment2_" + ID + "/" + "sm20158." +"txt")
	commenttext = ''
	text = thread[j]['bodyText']
	if ".T)" in text:
		point = text.find(".T)")
		meigararmei =  text[(point-14):(point + 3)]
		text = text.replace(meigararmei,"")
	sentence = text.replace("。"," ")
	if sentence != '':
		sentence = unicodedata.normalize("NFKC", "".join(unicodedata.normalize("NFKC", sentence.decode("utf-8")).split()))
		sentence = normalize(sentence)
		sentence = sentence.lower()
		sentence = re.sub(re.compile("[!-/:-@[-`{-~]"), '', sentence.encode("utf-8"))
		sentence = sentence.replace(" ", "")
		sentence = sentence.replace("　", "")
		sentence = sentence.replace("、", "")
		sentence = tagger.parse(sentence)
		fo.write(sentence)
	else:
		continue
	fo.write("\n")
	fo.close()


filename = ("allbuisinessnews.txt")
fo = file(filename,'w')

Ejemplo n.º 39

0

Mostrar archivo

Archivo: server.py Proyecto: hptruong93/MouseGestureRecognition

def do_predict_single(data):
    current = normalizer.normalize(np.array(data))
    result = model.predict(current)[0]
    result = LABEL_LIST[int(result)]
    return { result : 1 }