def crawl(self,current=False): """ The main function - crawl the league and mine some data. """ logging.info('Starting crawl') self.driver.get(self.league_link) self.team_names = set([unidecode(thr.text) for thr in \ self.driver.find_element_by_class_name("stat-table").find_elements_by_class_name("team-link")]) self.driver.find_element(By.XPATH, '//*[@id="sub-navigation"]').find_element(By.PARTIAL_LINK_TEXT, 'Fixtures').click() self.played_months = self.get_played_months() self.load_previous_data(current) prog_bar = ChargingBar('Progress of %s crawling:'%' '.join([self.league,str(self.year)]),max=sum([len(self.fixtures[month]) for month in self.played_months[self.played_months.index(self.start_month)::-1]])) for month in self.played_months[self.played_months.index(self.start_month)::-1]: for game in self.fixtures[month]: logging.info('Starting to parse game') if game: self.parse_game(game) prog_bar.next() logging.info('Finished game, moving to the next one') else: logging.info('Finished month, saving to disk') self.save_month(month) if current: pass # DBHandler(args_parser.LEAGUE_NAME).update_db(self.all_teams_dict,str(self.year)) else: #we're done - we can save to the DB now # if not current: DBHandler(args_parser.LEAGUE_NAME).insert_to_db(self.all_teams_dict,str(self.year)) self.driver.quit() prog_bar.finish()
def experiment_with_polyfit(self, collection, coeff, x_dim=1800, y_dim=1200): bar = ChargingBar('Resizing and analysing images', max=len(collection)) ip = self.improc txt_file_cooccurrence = open(self.txt_cooccurrence, 'w') txt_file_moments = open(self.txt_moments, 'w') for o in collection: if o.ACrate >= 0: aspect_roi = np.polyval(coeff, o.ACrate) resized_img = ip.img_resizeProfileROIaspect(o.window, aspect_roi, x_dim=x_dim, y_dim_=y_dim) res = ip.img_detectROI(resized_img, thr_factor=0.5, zero_padding=False) if res == -1: print "ERROR!!" continue ROI = res[0] self.writeROImoments(ROI, o, txt_file_moments) self.writeROIcooccurrence(ROI, o, txt_file_cooccurrence) bar.next() txt_file_cooccurrence.close() txt_file_moments.close() bar.finish()
def decrypt_file(self, ciphertext_file_name, plaintext_file_name): ct_file = open(ciphertext_file_name, "rb") pt_file = open(plaintext_file_name, 'wb+') file_size = os.stat(ciphertext_file_name).st_size count_of_primes = len(self.primes) bar = ChargingBar('[*] Decrypting ', max=(file_size // self.chunk_size) - 1) # Read cipher text chunk cts = [] for i, (chunk, chunk_size) in enumerate( self.decryption_chunk_reader(ct_file, PRIME_SIZE, self.chunk_size)): chunk = ((chunk - self.beta) * modinv(self.alpha, self.primes[i % count_of_primes]) ) % self.primes[i % count_of_primes] cts.append(chunk) if len(cts) < count_of_primes: continue bar.next() ct = solve_crt(cts, self.primes) pt_file.write(ct.to_bytes(chunk_size, byteorder='little')) cts = [] bar.finish() l("Decryption done") l("Saved at %s" % os.path.abspath(pt_file.name))
def train(self, inputExamples, expected, iterations=BARSIZE): if self.initialized: currentIteration = 1 barIteration = 0 statusBar = ChargingBar("\x1b[4;36m"+"\t>> Training:", max=BARSIZE) interval = int(iterations/BARSIZE) if iterations > 100 else 1 errorMedia = 0 statusBar.start() while(currentIteration <= iterations): errorMedia = 0 prediction = self.__backPropagation(inputExamples, expected) errorMedia = self.cost_fun(prediction, expected) currentIteration += 1 if barIteration % interval == 0: statusBar.next() barIteration = 0 barIteration += 1 while(currentIteration < BARSIZE): currentIteration += 1 statusBar.next() statusBar.finish() self.trainingStatus = str(round(errorMedia,4)) + "TE" print("\x1b[1;33m"+"\t>> Error (Media) Cost: ", round(errorMedia,4)) print("\x1b[0;37m"+"=-"*35 + "=") else: print("<Error>: Empty Neural Network, use reset() or loadFromFile(file)") exit(1)
def gibbs_sampling(self, predict=dict(), given=dict(), n=10000, skip = 50): bar = Bar('Sampling', max=n) nodes = list(self.node.keys()) sample = self.random_sample(preset=given) count = 0 sum = 0 for i in range(n): last = None bar.next() node = None last = node while node is None or node in given.keys() or node == last: node = nodes[randint(0,len(nodes)-1)] parents = self.node[node]['parents'] if parents[0] is None: sample[node] = self.sample(node) else: given = {key: sample[key] for key in parents} sample[node] = self.sample(node, given=given) if count == skip: evidence = {key: sample[key] for key in predict.keys()} if not predict == evidence: continue sum += 1 else: count += 1 bar.finish() return sum/(n-count)
def calculate_emoji_sentiments(data): regex = re.compile(r'\d+(.*?)[\u263a-\U0001f645]') emoji_sentiments = {} bar = ChargingBar('Calculating Emoji Map\t\t\t', max=len(data)) for instance in data: emojis = extract_emojis(instance['tweet']) for emoji in emojis: if emoji not in emoji_sentiments: emoji_sentiments[emoji] = {} emoji_sentiments[emoji]['positive'] = 0 emoji_sentiments[emoji]['neutral'] = 0 emoji_sentiments[emoji]['negative'] = 0 if (instance['sentiment'] == 'positive'): emoji_sentiments[emoji]['positive'] += 1 elif (instance['sentiment'] == 'neutral'): emoji_sentiments[emoji]['neutral'] += 1 elif (instance['sentiment'] == 'negative'): emoji_sentiments[emoji]['negative'] += 1 bar.next() bar.finish() return emoji_sentiments
def get_all_people_connected(token, args): all_user = [] page = 0 if args.campus == "Paris": id_campus = 1 else: id_campus = get_id_campus(args) print("Get all peaple who is actually connected in {}".format(args.campus)) clear() bar = None while (1): args = [ 'access_token=%s' % (token['access_token']), 'token_type=bearer', 'page[size]=100', 'page[number]={}'.format(str(page)), 'filter[active]=true', ] status = requests.get("https://api.intra.42.fr/v2/campus/" + str(id_campus) + "/locations?%s" % ("&".join(args))) if not status.status_code == 200: print("Error during people connected search.") sys.exit() if not bar: bar = ChargingBar('Call API 42', max=(int(status.headers['X-Total']) // 100) + 2) response = status.json() if not response: break for poste in response: all_user.append([poste['user']['login'], poste['host']]) page += 1 bar.next() time.sleep(1) bar.finish() clear() return all_user
def get_features_dic(dataset_dic, config, segment_dur, segment_ol): features_dic = {} total = sum([len(v) for k, v in dataset_dic.items()]) bar = ChargingBar("Extracting RQA Measures for {} " "utterances...".format(total), max=total) for spkr in dataset_dic: features_dic[spkr] = {} for id, raw_dic in dataset_dic[spkr].items(): features_dic[spkr][id] = {} fs = raw_dic['Fs'] signal = raw_dic['wav'] segment_features_2D = extract_per_segment(fs, config, signal, segment_dur, segment_ol) features_dic[spkr][id]['x'] = segment_features_2D features_dic[spkr][id]['y'] = raw_dic['emotion'] bar.next() bar.finish() return features_dic, fs
def decrypt_file(self, ciphertext_file_name, plaintext_file_name): # filename, chunk_size, padding_size, base_destination): ct_chunk_reader = self.decryption_chunk_reader(ciphertext_file_name, self.padding_block_size_bytes, self.base_num) # ct_chunk_reader = self.decryption_chunk_reader(ciphertext_file_name) #, self.padding_block_size_bytes, self.base_num, self.max_block_size_after_encrypt_bytes, self.block_size_bytes ) pt_file = open(plaintext_file_name, 'wb+') file_size = os.stat(ciphertext_file_name).st_size bar = ChargingBar('[*] Decrypting ', max=(file_size // (self.max_block_size_after_encrypt_bytes * 1000)) + 1) for index, (pt_chunk, block_size) in enumerate(ct_chunk_reader): if index % 1000 == 0: bar.next() nums_after_lookup_table = '' for num in pt_chunk: nums_after_lookup_table = nums_after_lookup_table + self.look_up_table_decryption.get(num) nums_after_lookup_table = int(nums_after_lookup_table, self.base_num) # if len(bin(nums_after_lookup_table)[2:])/8 > block_size: # someshit=1 ct_bytes = nums_after_lookup_table.to_bytes(block_size, byteorder='little') # ct_bytes = nums_after_lookup_table.to_bytes(self.block_size_bytes, byteorder='little') pt_file.write(ct_bytes) # except Exception as e: # print(e) # pass bar.next(bar.max - bar.index) bar.finish() l("Decryption done") l("Saved at %s" % os.path.abspath(pt_file.name))
def read_and_upload(creds_path, source, folder_name=None): d = Drive(creds_path=creds_path) d.authenticate() if type(source) == str: source = Path(source) files = list_files(source) source_folder = source.parts[-1] # Select folder section if (folder_name): folder = d.check_folder_if_exist(folder_name) if folder: folder_id = d.create_folder(source_folder, folder).get('id') d.select_folder([folder_id]) else: new_folder = d.create_folder(folder_name).get('id') folder_id = d.create_folder(source_folder, [new_folder]).get('id') d.select_folder([folder_id]) else: folder_id = d.create_folder(source_folder).get('id') d.select_folder([folder_id]) # Upload section bar = ChargingBar('Uploading', max=len(files)) for f in files: file_name = f.parts[-1] upload = d.upload_file(f, file_name) bar.next() bar.finish()
def encrypt_file(self, plaintext_file_name, ciphertext_file_name): pt_file = self.encryption_chunk_reader(plaintext_file_name, self.block_size_bytes) ct_file = open(ciphertext_file_name, 'wb+') file_size = os.stat(plaintext_file_name).st_size bar = ChargingBar('[*] Encrypting ', max=(file_size // (self.block_size_bytes * 100)) + 1) for index, pt_chunk in enumerate(pt_file): if index % 100 == 0: bar.next() plain_num_base_10 = int.from_bytes(pt_chunk, byteorder='little') # print(self.block_size_bytes) nums_after_lookup_table = '' # temp = '' for num in int2base(plain_num_base_10, self.base_num): # temp += num nums_after_lookup_table = nums_after_lookup_table + self.look_up_table_encryption.get(num) # print('PT: %s' % temp) # print('CT: %s' % nums_after_lookup_table ) leading_zero_bytes = re.search('(?!0)', nums_after_lookup_table).start() nums_after_lookup_table = int(nums_after_lookup_table, self.base_num) padding_bits = self.max_block_size_after_encrypt_bits - len( bin(nums_after_lookup_table)[2:]) - leading_zero_bytes padding_bytes = padding_bits.to_bytes(self.padding_block_size_bytes, byteorder='little') # print('------') ct_bytes = nums_after_lookup_table.to_bytes(self.max_block_size_after_encrypt_bytes, byteorder='little') ct_file.write(padding_bytes) ct_file.write(ct_bytes) bar.finish() l("Encryption done") l("Saved at %s" % os.path.abspath(ct_file.name))
def get_max_size_photos(self, user_id=None, count=5): if user_id is None: user_id = self.owner_id all_photos = self.get_all_photo(user_id, count) json_file = [] photo_list = [] print('\nПолучаем фотографии максимального размера из VK') bar = ChargingBar('Countdown', max=len(all_photos['response']['items'])) for photo in all_photos['response']['items']: photo_name = str(photo['likes']['count']) + '.jpg' photo_size = photo['sizes'][len(photo['sizes']) - 1]['type'] json_file.append({'file_name': photo_name, 'size': photo_size}) photo_list.append({ 'file_name': photo_name, 'url': photo['sizes'][len(photo['sizes']) - 1]['url'] }) bar.next() time.sleep(0.1) bar.finish() return [json_file, photo_list]
def PREPARE_DATASET(trainingArticleCount, preprocessor: Preprocessor, maxArticles, allowedCategories=[], dtype='reuters') -> List[DataSet]: #create Array with two datasets. One training, one test dataSet = [DataSet(), DataSet()] #check datatype and initialize provider if dtype == 'reuters': soupLoader = SoupLoader(-1) provider = ReutersProvider(soupLoader) else: provider = TwentyNewsProvider('../TwentyNews/') #start nice percentage bar. Good to have visuals ;) bar = ChargingBar("Preparing dataset: ", max=maxArticles) while bar.index <= maxArticles: try: #try to create a new article article = ArticleFactory.GET_NEXT_ARTICLE( provider, allowedCategories) article.process(preprocessor) #append the article to the dataset dataSet[int(bar.index / trainingArticleCount)].append(article) bar.next() except OutOfArticlesError: #bar would stop at 99% if not increased once more bar.next() break bar.finish() return dataSet
def asphericity(self, freq=1): """compute asphericity of the Voronoi cell""" #progress bar frames = int(self.traj.n_frames / freq) bar = ChargingBar('Processing', max=frames, suffix='%(percent).1f%% - %(eta)ds') for i in range(0, self.traj.n_frames, freq): for j in range(self.traj.n_atoms): if self.traj.atom_names[i][j] == self.center: #center coordinate c = self.traj.coords[i][j] #coordinates cs = self.traj.coords[i] #box_size L = self.traj.box_size[i] #new coordinates after wrapping nc = self.wrap_box(c, cs, L) points = self.polyhedron(nc, j, L) e = self.compute_vc(points) self.raw.append(e) bar.next() bar.finish()
def get_user_who_make_the_project(id, token, argument): page = 0 all_user = get_buffer_file(argument) if not all_user or argument.update: print("Get all users who register the project {} (Can take some time the first time...)".format(argument.name_project)) bar = None while (1): args = [ 'access_token=%s' % (token['access_token']), 'token_type=bearer', 'page[size]=100', 'page[number]={}'.format(str(page)), ] status = requests.get("https://api.intra.42.fr/v2/projects/" + str(id) + "/projects_users?%s" % ("&".join(args))) if not status.status_code == 200: print("Error during projects users search.") sys.exit() if not bar: bar = ChargingBar('Call API 42', max=(int(status.headers['X-Total']) // 100) + 2) response = status.json() if not response: break for projet in response: for team in projet['teams']: for user in team['users']: all_user[user['login']] = [projet['status'], projet['validated?'], projet['final_mark']] page += 1 bar.next() time.sleep(1) bar.finish() clear() clear() create_buffer_file(argument, all_user) return all_user
def eval(dataset_gen, model_path, n_sources, n_batches, n_jobs): model_name = os.path.basename(model_path) eval_dic = {'sdr': [], 'sir': [], 'sar': []} model, optimizer, _, _, args, mean_tr, std_tr, training_labels = \ model_logger.load_and_create_the_model(model_path) k_means_obj = KMeans(n_clusters=n_sources, n_jobs=n_jobs) model.eval() with torch.no_grad(): bar = ChargingBar("Evaluating model {} ...".format(model_name), max=n_batches) for batch_data in dataset_gen: abs_tfs, wavs_lists, real_tfs, imag_tfs = batch_data if torch.cuda.is_available(): input_tfs = abs_tfs.cuda() else: input_tfs = abs_tfs # the input sequence is determined by time and not freqs # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps input_tfs = input_tfs.permute(0, 2, 1).contiguous() # normalize with mean and variance from the training dataset input_tfs -= mean_tr input_tfs /= std_tr vs = model(input_tfs) for b in np.arange(vs.size(0)): embedding_features = vs[b, :, :].data.cpu().numpy() z_embds = (embedding_features - np.mean(embedding_features, axis=0)) / ( np.std(embedding_features, axis=0) + 10e-8) embedding_labels = np.array(k_means_obj.fit_predict(z_embds)) sdr, sir, sar = np_eval.naive_cpu_bss_eval( embedding_labels, real_tfs[b].data.numpy(), imag_tfs[b].data.numpy(), wavs_lists[b].data.numpy(), n_sources, batch_index=b) eval_dic['sdr'].append(sdr) eval_dic['sir'].append(sir) eval_dic['sar'].append(sar) bar.next() bar.finish() # return both mean and std values result_dic = {} for k, v in eval_dic.items(): result_dic[k] = np.array(v) return result_dic
def main(args): if len(args) != 2: print('Usage: {0} cvs_dir_path'.format(args[0])) sys.exit(1) cvs_dir_path = args[1] csv_list = glob.glob(cvs_dir_path + "/*.csv") csv_list = list( filter( lambda x: "_sum" not in x and "bucket" not in x and "count" not in x, csv_list)) bar = ChargingBar('Processando arquivos', max=len(csv_list), suffix='%(percent).1f%% %(elapsed_td)s') timeseries_variances = {} for csv_file_name in csv_list: timeseries = group_time_series(csv_file_name) calc_timeseries_variances(timeseries_variances, timeseries) bar.next() bar.finish() print("\n Metricas ordenadas por variancia: ") sorted_dic = OrderedDict( sorted(timeseries_variances.items(), key=itemgetter(1), reverse=True)) for item in sorted_dic.items(): print(item)
def train(self, autostop): m = len(self.km) bar = ChargingBar('Training', max=self.range, suffix='%(percent)d%%') for i in range(self.range): tmp0 = self.rate * (1 / m) * sum([ self.predict(t.th[0], self.th[1], self.km[i]) - self.price[i] for i in range(m) ]) tmp1 = self.rate * (1 / m) * sum([(self.predict( t.th[0], self.th[1], self.km[i]) - self.price[i]) * self.km[i] for i in range(m)]) self.th[0] -= tmp0 self.th[1] -= tmp1 self.tmp_history[0].append(tmp0) self.tmp_history[1].append(tmp1) self.th_history[0].append(self.th[0]) self.th_history[1].append(self.th[1]) bar.next() mse = sum([(self.predict(self.th[0], self.th[1], self.km[i]) - self.price[i])**2 for i in range(m)]) if autostop & i != 0: percent = self.percent_diff(mse, self.mse_hist[-1]) if percent < 1e-50: break self.mse_hist.append(mse) bar.finish() self.th_dest[0] = self.th[0] self.th_dest[1] = self.th[1] self.destandardize_theta(self.km_ref, self.price_ref, self.th_dest) write_theta(self.th_dest[0], self.th_dest[1])
def UpdateRoutine(): addrinfo = socket.getaddrinfo(MYGROUP_4, None)[0] Socket = socket.socket(addrinfo[0], socket.SOCK_DGRAM) # Set Time-to-live (optional) ttl_bin = struct.pack('@i', MYTTL) Socket.setsockopt(socket.IPPROTO_IP, socket.IP_MULTICAST_TTL, ttl_bin) SecureFirmwareName = 'SecureFirmware.bin' SecureFirmwareLen = GetFirmwareSize(SecureFirmwareName) print("Firmware file has load.") ChunkSize = 1012 ChunkCount = ceil( SecureFirmwareLen / ChunkSize ) SecureFirmwareFile = open(SecureFirmwareName,"rb") print("Firmware file size is %s bytes" % SecureFirmwareLen) datafile = SecureFirmwareFile.read( ChunkSize ) Progress = ChargingBar('Updating', max=ChunkCount, suffix = '%(index)d/%(max)d [%(percent)d%%]') while datafile: Socket.sendto(datafile, (addrinfo[4][0], MYPORT)) datafile = SecureFirmwareFile.read( ChunkSize ) Progress.next( ) time.sleep(0.2)
def upload_to_ya(self, upload_list): ya_obj = YaUpLoader(self.ya_token) ya_load_to = input('\nВведите путь до папки на ya_disk: ') print(ya_obj.check_folder(ya_load_to)) print(f'\nЗагружаем файлы на YaDisk') bar = ChargingBar('Countdown', max=len(upload_list[1])) hash_map = {} for photo in upload_list[1]: bar.start() file_name = photo['file_name'] if file_name in hash_map.keys(): last_name = file_name value = hash_map[last_name] + 1 file_name = file_name.split('.')[0] + '_' + str(value) + '.jpg' hash_map[last_name] = value else: hash_map[file_name] = 1 ya_file_to = ya_load_to + '/' + file_name url = photo['url'] res = requests.get(url).content ya_obj.upload(ya_file_to, res) bar.next() bar.finish()
def unfollowNon(): creds = credentials.credentials(2) consumer_key = creds[0] consumer_secret = creds[1] access_token = creds[2] access_token_secret = creds[3] auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) arrFriends = api.friends_ids(screen_name='HayHilos') arrFollowers = api.followers_ids(screen_name='HayHilos') print("Unfollow process started...\n") contador = 0 arrFr = len(arrFriends) arrFo = len(arrFollowers) diferencia = int(arrFr)-int(arrFo) bar1 = ChargingBar('Processing:', max=diferencia) try: for follower in arrFriends: if follower not in arrFollowers: api.destroy_friendship(follower) time.sleep(5) contador += 1 bar1.next() if contador >= 50: contador = 0 time.sleep(180) bar1.finish() print('\nProcess completed successfully.') except tweepy.RateLimitError: print('\nAn error ocurred.') time.sleep(15 * 60) print('\n')
async def main(): """ Collect characters images from Marvel Do not store those which do not have any available image. """ images_dir = "./superpower-static/images" if not os.path.exists(images_dir): os.makedirs(images_dir) with open("./superpower/characters.json", "r") as f: characters = json.load(f) bar = ChargingBar('Fetching ', max=len(characters)) for c in characters: bar.next() async with aiohttp.ClientSession() as s: thumbnail = c["thumbnail"] if thumbnail["path"] == THUMBNAIL_NOT_FOUND: continue img = "{}/{}.{}".format(thumbnail["path"], "portrait_uncanny", thumbnail["extension"]) async with s.get(img) as r: img_path = os.path.join( images_dir, "{}.{}".format(slugify.slugify(c["name"]), thumbnail["extension"])) with open(img_path, "wb") as i: i.write(await r.read()) bar.finish()
def solve_le(v, M): rows, cols, ok = check_matrix(M) assert (ok) assert (rows == cols) Msh = transpose(M) rows_locked = [] # Индексы строк для блокировки bar = ChargingBar('Processing', max=rows) while True: where_unity = find_unity_columns(Msh) active_unity = sorted(where_unity) uniq_cols = filter_uniq_unity_columns(active_unity, Msh) # Индексы строк для блокировки уже сформированных единичных столбцов old_len = len(rows_locked) rows_locked = uniq_cols.keys() new_len = len(rows_locked) for i in range(new_len - old_len): bar.next() if new_len == rows: # Все rows столбцов - единичные и разные break # Всегда 1 раз, т.е. две строки Msh, row, _ = shuffle_matrix(Msh, 1, False, rows_locked) v[row[0][1]] = (v[row[0][0]] ^ v[row[0][1]]) Msh = transpose(Msh) where_unity = find_unity_columns(Msh) uniq_cols = filter_uniq_unity_columns(active_unity, Msh) w = [0] * rows for key, val in uniq_cols.items(): w[key] = v[val] bar.finish() return w
def encrypt_file(self, plaintext_file_name, ciphertext_file_name): pt_file = open(plaintext_file_name, "rb") ct_file = open(ciphertext_file_name, 'wb+') file_size = os.stat(plaintext_file_name).st_size bar = ChargingBar('[*] Encrypting ', max=(file_size // self.chunk_size) - 1) last_block_size = file_size % self.chunk_size ct_file.write(last_block_size.to_bytes(10, byteorder='little')) for chunk in self.encryption_chunk_reader(pt_file, self.chunk_size): cts = [] for p in self.primes: small_p = (chunk % p) small_cipher = (self.alpha * small_p + self.beta) % p cts.append(small_cipher) for i, ct in enumerate(cts): x = ct.to_bytes(PRIME_SIZE, byteorder='little') ct_file.write(x) bar.next() bar.finish() l("Encryption done") l("Saved at %s" % os.path.abspath(ct_file.name))
def lemmatize(data): output = [] lemmatizerEn = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES) lemmatizerEs = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES, lookup=spacy.lang.es.LOOKUP) bar = ChargingBar('Lemmatizing\t\t\t\t', max=len(data)) for instance in data: new_tweet = {} new_tweet['tweetid'] = instance['tweetid'] new_tweet['tweet'] = instance['tweet'] new_tweet['tokens'] = [] new_tweet['langid'] = instance['langid'] new_tweet['sentiment'] = instance['sentiment'] for i, word in enumerate(instance['tokens']): if (instance['langid'][i] == 'lang1'): new_tweet['tokens'].append(lemmatizerEn.lookup(word)) elif (instance['langid'][i] == 'lang2'): new_tweet['tokens'].append(lemmatizerEs.lookup(word)) else: new_tweet['tokens'].append(word) # new_tweet['tokens'].append(lemmatizerEn.lookup(word)) output.append(new_tweet) new_tweet = {} new_tweet['tweetid'] = instance['tweetid'] new_tweet['tweet'] = instance['tweet'] new_tweet['tokens'] = [] new_tweet['langid'] = [] new_tweet['sentiment'] = instance['sentiment'] bar.next() bar.finish() return output
def IniciaFlujoSecretos(Secretos): lista = [] lista = ls(lista,Secretos.rutaRepositorio) barra = ChargingBar(Fore.GREEN + 'Escaneando: ', max=len(lista)) resultado = [] for file in lista: contenido = readFile(file) Cambios = False Mensaje = '' for secret in Secretos.listaSecretos: if contenido.find(secret['Valor']) != -1: val = secret['Valor'] Mensaje += f'\t{file} - Secreto encontrado: {val}\n' contenido = contenido.replace(secret['Valor'], secret['Llave']) Cambios = True if Cambios: writeFile(file, contenido) Mensaje += f'\tSecretos guardados en {file}' if Mensaje != '': resultado.append(Mensaje) Mensaje = '' barra.next() barra.finish() for msg in resultado: print(Fore.YELLOW + msg)
def remove_stop_words(data): spacy_enstopwords = spacy.lang.en.stop_words.STOP_WORDS spacy_esstopwords = spacy.lang.es.stop_words.STOP_WORDS output = [] bar = ChargingBar('Removing Stop Words\t\t\t', max=len(data)) for instance in data: new_tweet = {} new_tweet['tweetid'] = instance['tweetid'] new_tweet['tweet'] = instance['tweet'] new_tweet['tokens'] = [] new_tweet['langid'] = [] new_tweet['sentiment'] = instance['sentiment'] for i, word in enumerate(instance['tokens']): if instance['langid'][i] == 'lang1': if word not in spacy_enstopwords: new_tweet['tokens'].append(word) new_tweet['langid'].append(instance['langid'][i]) elif instance['langid'][i] == 'lang2': if word not in spacy_esstopwords: new_tweet['tokens'].append(word) new_tweet['langid'].append(instance['langid'][i]) else: new_tweet['tokens'].append(word) new_tweet['langid'].append(instance['langid'][i]) output.append(new_tweet) new_tweet = {} new_tweet['tweetid'] = instance['tweetid'] new_tweet['tweet'] = instance['tweet'] new_tweet['tokens'] = [] new_tweet['langid'] = [] new_tweet['sentiment'] = instance['sentiment'] bar.next() bar.finish() return output
def experiment(self, collection, x_dim, y_dim): txt_file_cooccurrence = open(self.txt_cooccurrence, 'w') txt_file_moments = open(self.txt_moments, 'w') bar = ChargingBar('Resizing and analysing images', max=len(collection)) for ii, o in enumerate(collection): #print "Resizing images, analysing and writing on disk: %d of %d" % (o.id,len(collection)) if o.ACrate >= 0: # original image image = o.window # interpolated image interp_image = self.improc.img_interpolateImage( image, x_dim, y_dim) # detecting blob ROI, row_interval, col_interval = self.improc.img_detectROI( interp_image, thr_factor=0.5, zero_padding=False) self.writeROImoments(ROI, o, txt_file_moments) self.writeROIcooccurrence(ROI, o, txt_file_cooccurrence) #print time.time()-t0 bar.next() txt_file_cooccurrence.close() txt_file_moments.close() bar.finish() print "Experiment successfully completed!" print
def gibbs_sampling(self, bn, predict=dict(), given=dict(), n=10000, skip = 50): bar = Bar('Sampling', max=n) nodes = list(self.node.keys()) sample = self.random_sample(bn, preset=given) count = 0 sum = 0 for i in range(n): last = None bar.next() node = None last = node while node is None or node in given.keys() or node == last: node = nodes[randint(0,len(nodes)-1)] parents = self.node[node]['parents'] if parents[0] is None: sample[node] = self.sample(node) else: given = {key: sample[key] for key in parents} sample[node] = self.sample(node, given=given) if count == skip: evidence = {key: sample[key] for key in predict.keys()} if not predict == evidence: continue sum += 1 else: count += 1 bar.finish() return sum/(n-count)
def sci_online(): import os import requests check_directory() from os import path wd = os.getcwd() print('Spinal Cord Injury Online') # Download the file from urllib.request import urlopen from zipfile import ZipFile from progress.bar import ChargingBar bar = ChargingBar('Downloading subject data', max=20) sessions = [1, 2] session_type = ['Train', 'Test'] for session in sessions: for trial_type in session_type: sci_online_url = 'http://bnci-horizon-2020.eu/database/data-sets/001-2019/P09%%20Online%%20Session%%20%d%%20%s.zip' % ( session, trial_type) # Create URL name print('Downloading subject data from ' + sci_online_url) sci_online_dwl = urlopen(sci_online_url) # Download zip file print('Saving data into the tmp folder.') tempzip = open('tmp\session%d_%s.zip' % (session, trial_type), "wb") tempzip.write(sci_online_dwl.read()) tempzip.close() print('Extracting files into data folder.') zf = ZipFile('tmp\session%d_%s.zip' % (session, trial_type)) zf.extractall(path=wd + '\data') zf.close() bar.next() bar.finish()
def get_word_embeddings(data): bc = BertClient() # bc.encode(['First do it', 'then do it right', 'then do it better']) embeddings = [] sentiment_embeddings = [] bar = ChargingBar('Calculating tweet embeddings\t\t\t', max=len(data)) for instance in data: # should encode the join of the tokens array instead # kinda a hacky fix to an empty tokens array if len(instance['tokens']) == 0: embedding = bc.encode([instance['tweet']]) else: embedding = bc.encode([' '.join(instance['tokens'])]) embeddings.append(embedding) sentiment_embeddings.append({ "embedding": embedding[0], "sentiment": instance['sentiment'] }) bar.next() bar.finish() # print(embeddings) # print(len(embeddings), len(embeddings[0]),len(embeddings[0][0])) return embeddings, sentiment_embeddings
def rejection_sample(self, predict=dict(), given=dict(), n=10000): sum = 0 bar = Bar('Sampling', max=n) for i in range(n): bar.next() sample = self.compute_sample() evidence = {key: sample[key] for key in given.keys()} if not given == evidence: continue evidence = {key: sample[key] for key in predict.keys()} if not predict == evidence: continue sum += 1 bar.finish() return sum/n
def likelihood_weighting(self, predict=dict(), given=dict(), n=10000): num = den = 0 bar = Bar('Sampling', max=n) for i in range(n): bar.next() sample = self.compute_sample(preset=predict) for node in predict.keys(): parents = self.node[node]['parents'] given_pa = {key: sample[key] for key in parents} weight = float(self.get_probability(node, evidence=given_pa, value=predict[node])) evidence = {key: sample[key] for key in given.keys()} if given == evidence: num += weight den += weight bar.finish() return num/den
def create_examples(self,year,lookback=15,current=False): """ This function creates all the examples for self.league, year. The examples are created using the given lookback. """ def update_all_teams_dict(res,all_teams_dict,team,first): for fix in sorted(res): if fix == 1 and res[fix] == {}: all_teams_dict[team][fix] = [] continue if first: all_teams_dict[team][fix] = [res[fix][k] for k in sorted(res[fix])] else: all_teams_dict[team][fix] += [res[fix][k] for k in sorted(res[fix])] def relative_features(arr1,arr2,fn): combined_list_all_1 = [value for (value,key) in zip(arr1,fn) if key.split("all_pos")>1 ] combined_list_att_1 = [value for (value,key) in zip(arr1,fn) if key.split("att_pos")>1 ] combined_list_def_1 = [value for (value,key) in zip(arr1,fn) if key.split("def_pos")>1 ] combined_list_all_2 = [value for (value,key) in zip(arr2,fn) if key.split("all_pos")>1 ] combined_list_att_2 = [value for (value,key) in zip(arr2,fn) if key.split("att_pos")>1 ] combined_list_def_2 = [value for (value,key) in zip(arr2,fn) if key.split("def_pos")>1 ] all_rel = [1 for (val1,val2) in zip (combined_list_all_1,combined_list_all_2) if val1 > val2] att_rel = [1 for (val1,val2) in zip (combined_list_att_1,combined_list_att_2) if val1 > val2] def_rel = [1 for (val1,val2) in zip (combined_list_def_1,combined_list_def_2) if val1 > val2] return float(len(all_rel))/len(combined_list_all_1), float(len(att_rel))/len(combined_list_att_1), float(len(def_rel))/len(combined_list_def_1) from features.features import Features temp_DB = self.temp_DB all_teams_names = [g['_id'] for g in temp_DB[self.league].aggregate([{"$match":{"Year":int(year)}},{"$group":{"_id":"$GName"}}])] all_teams_dict = {name:{} for name in all_teams_names} features = Features(temp_DB[self.league],year,self.league) features_names = [] prog_bar = ChargingBar('Creating examples for %s-%s'%(self.league,year),max=len(all_teams_dict)) for team in all_teams_dict: res_by_all, res_by_non_avg = features.create_features(team,lookback) if not features_names: features_names = features.features_names update_all_teams_dict(res_by_all, all_teams_dict, team, True) update_all_teams_dict(res_by_non_avg, all_teams_dict, team, False) prog_bar.next() examples = [] tags = [] curr_examples = [] prog_bar.finish() for team in all_teams_names: for fix in sorted(all_teams_dict[team]): if fix == 1 and all_teams_dict[team][fix]==[]: continue curr_game = temp_DB[self.league].find_one({"GName":team,"Fix":fix,"Year":int(year)}) if curr_game is None: continue if curr_game["HA"]=="home": vs_curr_game = temp_DB[self.league].find_one({"GName":curr_game["VS"],"VS":team,"HA":"away","Year":int(year)}) try: vs_curr_fix = vs_curr_game["Fix"] except TypeError as e: vs_curr_fix = fix+1 all_teams_dict[curr_game["VS"]][vs_curr_fix] = [] if all_teams_dict[curr_game["VS"]][vs_curr_fix] == []: continue rel_all, rel_att, rel_def = relative_features(all_teams_dict[team][fix], all_teams_dict[curr_game["VS"]][vs_curr_fix], features_names) examples += [np.array(all_teams_dict[team][fix])-np.array(all_teams_dict[curr_game["VS"]][vs_curr_fix])] examples[-1] = np.concatenate((examples[-1],[rel_all, rel_att, rel_def])) temp_dict = {"Ex":examples[-1],"Fix":curr_game["Fix"],"Res":curr_game["Result"],"Home":team,"Away":curr_game["VS"],"League":self.league} curr_examples += [temp_dict] tags += [curr_game["Tag"]] if not current: return examples,tags else: return curr_examples,tags
output = ps.communicate()[0] # compute how many bytes to expect in last block nblocks = int(math.ceil(int(f['size'])/float(bsize))) slack = nblocks*int(bsize) - int(f['size']) actualbytes = int(bsize) - slack # read slack space tmp = open(tmpfile, "rb") bcount = 0 hidden="" try: byte = tmp.read(1) while byte != "": bcount += 1 if bcount > actualbytes and ord(byte) != 0x0: hidden+=byte byte = tmp.read(1) finally: tmp.close() os.remove(tmpfile) # use subdir to store result files # create file named after inode and write hidden bytes if hidden != "": if not os.path.exists(directory): os.makedirs(directory) f_out = open(directory+"/"+f["inode"], 'w') try: f_out.write(hidden) finally: f_out.close() bar.next() bar.finish()