def create_train_and_val_data( self, save_file_name: Union[None, str] = None ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Creates the training and validation sets and labels The sets contain the top 20 ms2ds matches of each spectrum and a collection of different scores and data of these matches in a pd.DataFrame. The labels contain a dataframe with the tanimoto scores. Args ---- save_file_name: File name to which the result will be stored. The result is stored as a pickled file of a tuple containing the training_set, the training_labels, the validation_set and the validation_labels in that order. """ training_set, training_labels = \ self.get_matches_info_and_tanimoto(self.training_spectra) validation_set, validation_labels = \ self.get_matches_info_and_tanimoto(self.validation_spectra) if save_file_name: with open(save_file_name, "wb") \ as new_file: pickle.dump((training_set, training_labels, validation_set, validation_labels), new_file) return training_set, training_labels, validation_set, validation_labels
def dump(self, rv: Any) -> None: """Dump data to the cache as a pickle. :param rv: The arbitrary python object to dump """ with open(self.path, "wb") as file: pickle.dump(rv, file, protocol=pickle.HIGHEST_PROTOCOL)
def save(self, path): # FilePath -> IO () par = os.path.split(path)[0] if not os.path.exists(par): os.makedirs(par) f = gzip.open(path, 'wb') pickle.dump(self.db, f, -1) f.close()
def get_kNN_ml_data(self, name2save=None): train_jet_particle, test_jet_particle = self.prepare_ml_data() ## compute persistence information for b0 features based on kNN graphs train_b0_pair = {} for key in train_jet_particle: train_b0_pair[key] = topology.ML_JetPersistance( ).get_kNN_ml_inputs(utils.get_p4(train_jet_particle[key]), k=self.k, p=self.p) test_b0_pair = {} for key in test_jet_particle: test_b0_pair[key] = topology.ML_JetPersistance().get_kNN_ml_inputs( utils.get_p4(test_jet_particle[key]), k=self.k, p=self.p) ml_data = { 'train_b0': train_b0_pair, 'test_b0': test_b0_pair, } if not name2save: return ml_data else: with open(name2save, 'wb') as handle: pickle.dump(ml_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
def init_mnist(): download_mnist() dataset = _convert_numpy() print("Creating pickle file ...") with open(save_file, 'wb') as f: pickle.dump(dataset, f, -1) print("Done!")
def get_DT_ml_data(self, name2save=None): train_jet_particle, test_jet_particle = self.prepare_ml_data() ## compute persistence information train_b0_pair = {} train_b1_pair = {} for key in train_jet_particle: pers_pairs = topology.ML_JetPersistance().get_ml_inputs( utils.get_p4(train_jet_particle[key]), zeta_type=self.zeta_type, R=self.R) train_b0_pair[key], train_b1_pair[key] = pers_pairs[ 'b0'], pers_pairs['b1'] test_b0_pair = {} test_b1_pair = {} for key in test_jet_particle: pers_pairs = topology.ML_JetPersistance().get_ml_inputs( utils.get_p4(test_jet_particle[key]), zeta_type=self.zeta_type, R=self.R) test_b0_pair[key], test_b1_pair[key] = pers_pairs[ 'b0'], pers_pairs['b1'] ml_data = { 'train_b0': train_b0_pair, 'train_b1': train_b1_pair, 'test_b0': test_b0_pair, 'test_b1': test_b1_pair } if not name2save: return ml_data else: with open(name2save, 'wb') as handle: pickle.dump(ml_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
def get_jet_obs(self, name2save=None): ''' compute N-Subjettiness for n=1-6 ''' train_jet_particle, test_jet_particle = self.prepare_ml_data() train_obs, test_obs = {}, {} Ns = [1, 2, 3, 4, 5, 6] for key in train_jet_particle: taus = [] for N in Ns: taus.append(utils.JetObs().Njettiness(utils.get_p4( train_jet_particle[key]), N=N, beta=0.2)) train_obs[key] = np.vstack(taus).T for key in test_jet_particle: taus = [] for N in Ns: taus.append(utils.JetObs().Njettiness(utils.get_p4( test_jet_particle[key]), N=N, beta=0.2)) test_obs[key] = np.vstack(taus).T obs = {'train': train_obs, 'test': test_obs} if not name2save: return obs else: with open(name2save, 'wb') as handle: pickle.dump(obs, handle, protocol=pickle.HIGHEST_PROTOCOL)
def training_model(number_topics=10, number_words=1): print("Get Files") data = data_loader.get_processed_papers() data = pd.DataFrame(data, columns=['paper_id', 'title', 'abstract', 'body_text']) text_data = [] for tokens in data['body_text']: for token in tokens: text_data.append(token) dictionary = corpora.Dictionary(text_data) corpus = [dictionary.doc2bow(text) for text in text_data] pickle.dump(corpus, open('corpus.pkl', 'wb')) dictionary.save('dictorionary.gensim') model = models.LdaModel(corpus, num_topics=number_topics, id2word=dictionary, passes=15) model.save('model5.gensim') topics = model.print_topics(num_words=number_words) for topic in topics: print(topic)
def dump_list(self, file, *args): """ dump_list(file, *args) Dumps lists to the file """ for lists in args: pickle.dump(lists, file, 2)
def make_dataset(input_path, output_path, size=128, nb_points=10000, number_models=None, overwrite=False, nb_samples_per_model=20, gl_tries=5, fast_skip=False): dataset.utils.make_dir(output_path) objects_path = os.path.join(input_path, "*/models/*.obj") objects_path = glob.glob(objects_path) if number_models is not None: objects_path = objects_path[:number_models] for path in tqdm.tqdm(objects_path): name = path.split(os.path.sep)[-3] object_dir = os.path.join(output_path, name) if fast_skip and os.path.isdir(object_dir): continue dataset.utils.make_dir(object_dir) pts_path = os.path.join(object_dir, "pts.pkl") pts = dataset.geometry.sample_points(path, nb=nb_points) pts = np.array(pts, dtype=np.float32) with open(pts_path, 'wb') as handle: pickle.dump(pts, handle, protocol=pickle.HIGHEST_PROTOCOL) for i in range(nb_samples_per_model): render_name = f"render_{str(i).zfill(5)}" mat_name = f"mat_{str(i).zfill(5)}" image_path = os.path.join(object_dir, render_name + ".jpg") mat_path = os.path.join(object_dir, render_name + ".pkl") mat = dataset.geometry.random_camera() if not overwrite and os.path.isfile(mat_path): continue for i in range(gl_tries): try: color = dataset.rendering.render(path, mat, im_size=size) im = Image.fromarray(color) im.save(image_path) with open(mat_path, 'wb') as handle: pickle.dump(mat, handle, protocol=pickle.HIGHEST_PROTOCOL) break except OpenGL.error.GLError: print(f"GL Error occured, try {i}, trying again.") except ValueError: print(f"Value error with {name}") break except RuntimeError: print(f"Runtime error with {name}") break except TypeError: print(f"Type error with {name}") break else: pass
def login_handler(self, remember=True, cookies=True): if self.driver.current_url == self.dash_board_url: pass else: self.driver.get(self.login_url) try: with open("cookies", "rb") as f: cookies = pickle.load(f) for cookie in cookies: self.driver.add_cookie(cookie) self.driver.refresh() except Exception as e: user_email = self.__get_xpath_elem( user["user-email-field"]).send_keys(self.user_mail) user_password = self.__get_xpath_elem( user["user-password-field"]).send_keys(self.user_pswd) if remember: user_remember_me = self._click(user["user-remember-me"]) self._click(user["user-login-btn"]) self._random_wait(3, 5) if cookies: if self.driver.current_url == self.dash_board_url: with open("cookies", "wb") as f: pickle.dump(self.driver.get_cookies(), f)
def update_skills_pickle( from_table: str = "skills.tab", pickle_out: str = "skills.pickle" ) -> None: """ First import, then serialize and store skills data, so that it is not hard coded here. """ if os.path.exists(os.path.join(_TABLES_LOCATION, pickle_out)): if os.path.getmtime( os.path.join(_TABLES_LOCATION, from_table) ) < os.path.getmtime(os.path.join(_TABLES_LOCATION, pickle_out)): return skills_d = {'groups':[]} path = os.path.join(_TABLES_LOCATION, from_table) with open(path) as handle: table = handle.read().splitlines() for row in table: if row.lstrip()[0] == '#': continue row = row.split('\t') name = row[0] group = row[1] if len(row) > 1 else None group = group if group != "" else None difficulty = int(row[2]) if len(row) > 2 else 0 skills_d[name] = {'group':group,'difficulty':difficulty} skills_d['groups'].append(group) with open(os.path.join(_TABLES_LOCATION, pickle_out), "wb") as handle: pickle.dump(symbols_d, handle, protocol=pickle.HIGHEST_PROTOCOL)
def get_credentials(logger: lg.Logger = None) -> pickle: """Get the proper credentials needed to write to the Google spreadsheet.""" creds = None if osp.exists(GGL_SHEETS_TOKEN): if logger: logger.info(F"osp.exists({GGL_SHEETS_TOKEN})") with open(GGL_SHEETS_TOKEN, "rb") as token: creds = pickle.load(token) # if there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if logger: logger.info("creds is None or not creds.valid") if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) if logger: logger.debug("creds.refresh(Request())") else: flow = InstalledAppFlow.from_client_secrets_file( CREDENTIALS_FILE, SHEETS_RW_SCOPE) creds = flow.run_local_server() if logger: logger.debug("creds = flow.run_local_server()") # save the credentials for the next run with open(GGL_SHEETS_TOKEN, "wb") as token: if logger: logger.debug("pickle.dump()") pickle.dump(creds, token, pickle.HIGHEST_PROTOCOL) return creds
def all_double_transforms_combo_60_66(testing_flag, transformation_indices, lr, batch_size, epoch_number): '''For all 49 combinations of 2 transformations, return a list of f1_macro and micro scores. This function will take transforms 00 to 16 ''' transformation_metrics = {} transformation_confidences = {} transformation_percentages = {} transformation_macro_at_50_percent = {} start_time = datetime.datetime.now() start_time_str = start_time.strftime("%Y%m%d-%H%M%S") path_to_embeddings = os.path.join(os.getcwd(), "embeddings", "chapman", "cardiac", "simclr") try: if not os.path.exists(path_to_embeddings): os.makedirs(path_to_embeddings) except OSError as err: print(err) path_to_training_percentages = os.path.join(os.getcwd(), "training_percentage", "chapman", "cardiac", "simclr") try: if not os.path.exists(path_to_training_percentages): os.makedirs(path_to_training_percentages) except OSError as err: print(err) for i in [6]: for j in [0, 1, 2, 3, 4, 5, 6]: transformation_indices = [i, j] string_indices = "".join([str(num) for num in transformation_indices]) save_name = f'{start_time_str}_testing-{testing_flag}_bs-{batch_size}_transformations-{string_indices}_lr-{lr}' np.random.seed(7) user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory = get_datasets_from_paths(testing_flag) np_train_data, train_labels, train_labels_dict, np_test_data, test_labels, test_labels_dict = create_train_test_datasets(user_datasets, patient_to_rhythm_dict, test_train_split_dict, working_directory) trained_simclr_model, epoch_losses = train_simclr(testing_flag, np_train_data, transformation_indices=transformation_indices, lr=lr, batch_size=batch_size, epoch_number=epoch_number) metrics, middle_ovo, half_ovo, mean_ovo, std_ovo, middle_ovr, half_ovr, mean_ovr, std_ovr, percentages, auc_ovo_scores, auc_ovr_scores, embeddings_data = downstream_evaluation(trained_simclr_model, np_train_data, np_test_data, train_labels, test_labels) print(metrics) print_auc_intervals(middle_ovo, half_ovo, mean_ovo, std_ovo, middle_ovr, half_ovr, mean_ovr, std_ovr) # store to dictionary transformation_metrics[string_indices] = metrics transformation_confidences[string_indices] = [middle_ovo, half_ovo, mean_ovo, std_ovo, middle_ovr, half_ovr, mean_ovr, std_ovr] transformation_percentages[string_indices] = [percentages, auc_ovo_scores, auc_ovr_scores] macro_50_percent_data = auc_ovr_scores[4] if macro_50_percent_data: transformation_macro_at_50_percent[string_indices] = macro_50_percent_data else: transformation_macro_at_50_percent[string_indices] = mean_ovr save_data = [transformation_metrics, transformation_confidences, transformation_percentages, transformation_macro_at_50_percent] path_to_double_transforms = os.path.join(os.getcwd(), "embeddings", "double_transforms") if not os.path.exists(path_to_double_transforms): os.makedirs(path_to_double_transforms) save_name = f'{start_time_str}_testing-{testing_flag}_bs-{batch_size}_lr-{lr}_chapman_cardiac-00-16.pickle' save_path = os.path.join(path_to_double_transforms, save_name) print(f'all double transform path: {save_path}') with open(save_path, 'wb') as f: pickle.dump(save_data, f)
def save(self, file=None, v=1): '''save this object''' if not file: file = os.path.join(self.path, self.name + '.pkl') with open(file, 'wb') as out: pickle5.dump(self, out, pickle5.HIGHEST_PROTOCOL) if v: print(colors.green + "object saved\n" + colors.yellow + file + colors.black)
def save_subscriptions(self, subs): """Save subscribers to file. """ sub_info = { 'last_update': datetime.now(timezone.utc), 'subscriptions': subs} with open(self.settings.subs_file, 'wb') as fp: pickle.dump(sub_info, fp, pickle.HIGHEST_PROTOCOL)
def save_last_run(self, found_videos): """Save 'last run', which is just the current time to file. """ last_run = { 'last_run': datetime.now(timezone.utc), 'found_videos': found_videos} with open(self.settings.last_run_file, 'wb') as fp: pickle.dump(last_run, fp, pickle.HIGHEST_PROTOCOL)
def get_data(train=True): # feats = cPickle.load(open(coco_inception_features_path, "rb"), encoding="latin1") feats = cPickle.load(open('../data/coco_train_v3.pik', "rb"), encoding="latin1") feats.update( cPickle.load(open('../data/coco_val_ins.pik', "rb"), encoding="latin1")) sents = [] final_feats = [] filenames = [] js = json.load(open(coco_dataset_path, "r")) for i, img in enumerate(js["images"]): if train and img["extrasplit"] == "val": continue if (not train) and img["extrasplit"] != "val": continue if img["filename"] not in feats: continue if train: for sen in img["sentences"]: sents.append(sen["rm_style_tokens"]) final_feats.append(feats[img["filename"]]) filenames.append(img["filename"]) else: sents.append(img["sentences"][0]["rm_style_tokens"]) final_feats.append(feats[img["filename"]]) filenames.append(img["filename"]) final_feats = np.array(final_feats) data_file = 'cleaned_sents_train.pkl' if train is True else 'cleaned_test_train.pkl' if os.path.exists(data_file): with open(data_file, 'rb') as f: sents = cPickle.load(f) else: m = [] sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=3) dictionary_path = pkg_resources.resource_filename( "symspellpy", "frequency_dictionary_en_82_765.txt") # term_index is the column of the term and count_index is the # column of the term frequency sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1) for i in tqdm(sents, position=0): l = [] for j in range(len(i)): t = correct_spell( i[j].replace('NOUNNOUNNOUN', '').replace( "PARTPARTPART", "").replace("FRAMENET", "").replace( "ADJADJADJ", "").replace('INTJINTJINTJ', '').lower(), sym_spell) l.append(t) m.append(l) sents = m with open(data_file, 'wb') as f: cPickle.dump(sents, f) return final_feats, filenames, sents
def save_dest_playlist(self, pl_id, pl_name): """Save playslists to file. """ pl_info = { 'last_update': datetime.now(timezone.utc), 'name': pl_name, 'id': pl_id} with open(self.settings.dest_pl_file, 'wb') as fp: pickle.dump(pl_info, fp, pickle.HIGHEST_PROTOCOL)
def dump(self, filename): # dump state of machine into a file d = dict() d["memory"] = self.memory d["stack"] = self.stack d["register"] = self.register d["xptr"] = self.exec_ptr with open(filename + '.pkl', 'wb+') as f: pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
def downstream_evaluation_ms(byol_encoder, test_dataset, train_dataset, segment_number, path_to_embeddings, save_name): train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), num_workers=28) test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), num_workers=28) for data_label in train_loader: data, label = data_label segment_size = data.size()[2] // segment_number split = torch.split(data, segment_size, dim=2) split = split[:segment_number] encoded_split_numpy = [byol_encoder(split_data.float()).detach().numpy() for split_data in split] # take mean of encoded segments byol_train_numpy_x = np.mean(np.array(encoded_split_numpy), axis=0) byol_train_numpy_y = label.detach().numpy() X_train, y_train = byol_train_numpy_x, byol_train_numpy_y for data_label in test_loader: data, label = data_label data, label = data_label segment_size = data.size()[2] // segment_number split = torch.split(data, segment_size, dim=2) split = split[:segment_number] encoded_split_numpy = [byol_encoder(split_data.float()).detach().numpy() for split_data in split] # take mean of encoded segments byol_test_numpy_x = np.mean(np.array(encoded_split_numpy), axis=0) byol_test_numpy_y = label.detach().numpy() X_test, y_test = byol_test_numpy_x, byol_test_numpy_y save_name = f'{save_name}-train_test.pickle' save_path = os.path.join(path_to_embeddings, save_name) with open(save_path, 'wb') as f: data = [X_train, X_test, y_train, y_test] pickle.dump(data, f) log_reg_clf = LogisticRegression(multi_class='multinomial', solver='lbfgs') log_reg_clf.fit(X_train, y_train) y_pred = log_reg_clf.predict(X_test) averages = ['micro', 'macro'] metrics = {} for average in averages: f1 = f1_score(y_test, y_pred, average=average) precision = precision_score(y_test, y_pred, average=average) recall = recall_score(y_test, y_pred, average=average) metrics[f'f1_{average}'] = f1 metrics[f'precision_{average}'] = precision metrics[f'recall_{average}'] = recall accuracy = accuracy_score(y_test, y_pred) metrics['accuracy'] = accuracy middle_macro, half_macro, mean_macro, std_macro, middle_micro, half_micro, mean_micro, std_micro = get_confidence_interval_f1_micro_macro(y_test, y_pred) percentages, f1_macro_scores, f1_micro_scores = different_percentage_training(X_train, X_test, y_train, y_test) return metrics, middle_macro, half_macro, mean_macro, std_macro, middle_micro, half_micro, mean_micro, std_micro
def to_bytes_gz(graph: BELGraph, protocol: int = pickle.HIGHEST_PROTOCOL) -> bytes: """Convert a graph to gzipped bytes with pickle. :param graph: A BEL graph :param protocol: Pickling protocol to use. Defaults to ``HIGHEST_PROTOCOL``. """ io = BytesIO() with gzip.open(io, mode='wb') as file: pickle.dump(graph, file, protocol=protocol) return io.getvalue()
def save_model(self, path: str) -> None: weights_to_save = {} for variable in self.sess.graph.get_collection( tf.GraphKeys.GLOBAL_VARIABLES): assert variable.name not in weights_to_save weights_to_save[variable.name] = self.sess.run(variable) data_to_save = {"params": self.params, "weights": weights_to_save} with open(path, 'wb') as out_file: pickle.dump(data_to_save, out_file, pickle.HIGHEST_PROTOCOL)
def save_train_test_embeddings(X_train, X_test, y_train, y_test, path_to_embeddings, save_name): start_time = datetime.datetime.now() start_time_str = start_time.strftime("%Y%m%d-%H%M%S") print(f'Starting to save train test embeddings: {start_time_str}') save_name = f'{save_name}-{start_time_str}.pickle' save_path = os.path.join(path_to_embeddings, save_name) print(f'path to embeddings: {save_path}') with open(save_path, 'wb') as f: data = [X_train, X_test, y_train, y_test] pickle.dump(data, f)
def exit_handler(self): print(f"R/W? - {self.read_write_mode}") if self.read_write_mode == READ_WRITE: # create directory if it doesn't exist self.lib_file_path.parent.mkdir(parents=True, exist_ok=True) print(f'PICKLING before EXIT: {self.lib_file_path}') print(f"SIZE: {len(self.media_files.keys())}") pprint(self.media_files.keys()) print(" - - ") with open(self.lib_file_path, 'wb') as f: pickle.dump(self.media_files, f, pickle.HIGHEST_PROTOCOL)
def save(self, path: str) -> None: """ Allow to dump tree to pickle. .pkl file extension should be specified tree.save("path.pkl") """ assert (self.tree), "No tree created for this class instance" with open(path, 'wb') as handle: pickle.dump(self.tree, handle)
def create_model(X, y, model_filename, classifier_type): """ Builds the classifier model :param X: Features :param y: Labels :param model_filename: the file where to save the model :param classifier_type: svm/rf :return: """ # split dataset X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) print('Training Classifier..') if classifier_type is 'svm': # creates SVM model and fits it on training samples clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) clf.fit(X_train, y_train) model = clf # stores the classifier in pickle file with open(model_filename, 'wb') as model_file: pickle.dump(model, model_file) # predict for test samples y_pred = clf.predict(X_test) # predict for training samples y_pred_train = clf.predict(X_train) else: # creates Random Forest model and fits it on training samples rf = RandomForestClassifier(n_estimators=50, criterion='gini', max_depth=40, max_features=11) rf.fit(X_train, y_train) model = rf # stores the classifier in pickle file with open(model_filename, 'wb') as model_file: pickle.dump(model, model_file) # predict for test samples y_pred = rf.predict(X_test) # predict for training samples y_pred_train = rf.predict(X_train) # Print accuracy score print("Test Accuracy score:", metrics.accuracy_score(y_test, y_pred)) print("Train Accuracy score:", metrics.accuracy_score(y_train, y_pred_train))
def build_vocab(self, min_freq=0, max_freq=sys.maxsize): """ build vocab + add eos encode sentence """ with open(os.path.join(self.data_dir, 'train.txt'), 'r') as fn: data = fn.readlines() if 'lambada' in self.data_dir: with open(os.path.join(self.data_dir, 'test.txt'), 'r') as fn: data.extend(fn.readlines()) with open(os.path.join(self.data_dir, 'valid.txt'), 'r') as fn: data.extend(fn.readlines()) print('building vocab ...') self.vocab = defaultdict(int) self.tok2id = {} self.id2tok = [] for line in tqdm(data): line = line.strip().split() for tok in line: self.vocab[tok] += 1 self.vocab = { a: self.vocab[a] for a in self.vocab if self.vocab[a] >= min_freq and self.vocab[a] <= max_freq } # sort vocab in case of using adaptive softmax self.vocab = list( sorted(self.vocab.items(), key=lambda a: a[1], reverse=True)) print(self.vocab[:10]) if 'lambada' in self.data_dir: self.vocab = self.vocab[:60000] self.vocab.append(('<unk>', 0)) self.id2tok = ['<pad>'] + ['<eos>'] + [a[0] for a in self.vocab] self.tok2id = {a: i for i, a in enumerate(self.id2tok)} self.vocab_size = len(self.id2tok) print('end building vocab ...') print('vocab size', len(self.tok2id)) with open(os.path.join(self.data_dir, 'vocab.pkl'), 'wb') as fn: pickle.dump( { 'id2tok': self.id2tok, 'tok2id': self.tok2id, 'vocab_size': self.vocab_size }, fn)
def build_fasttext(self, word_dict): # create word_vec with fastText vectors if not os.path.exists('word_vec.pkl'): ft = fasttext.load_model('cc.sv.300.bin') for word in tqdm(word_dict): vec = np.array(ft.get_word_vector(word)) vec = vec / np.sqrt(np.sum(np.power(vec, 2))) self.word_vec[word] = vec with open('word_vec.pkl', 'wb') as f: pickle.dump(self.word_vec, f, pickle.HIGHEST_PROTOCOL) else: with open('word_vec.pkl', 'rb') as f: self.word_vec = pickle.load(f)
def make_api_calls(self): """ Make API calls to get and store data ahead of the game. """ # Get Tweet counts for each user with open(f"{self.parent_dir}/data/tweet_counts_{self.uuid}.txt", "wb") as data_file: # Initiate empty dictionary counts_data = {} # Get number of Tweets for each user search_counts = Search_Counts(self.auth) for i, user in enumerate(self.users): # Make API call response = search_counts(f"from:{user[1:]} -is:retweet") if response.status_code == 200: response = response.text parsed = json.loads(response) counts_data[user] = parsed["totalCount"] else: return False, response.status_code pickle.dump(counts_data, data_file) # Get recent Tweets for each user with open(f"{self.parent_dir}/data/recent_search_{self.uuid}.txt", "wb") as data_file: # Initiate empty dictionary tweet_data = {} # Get Tweets for each user recent_search_data = Recent_Search_Data(self.auth) for i, user in enumerate(self.users): # Make API call response = recent_search_data(f"from:{user[1:]} -is:retweet") if response.status_code == 200: response = response.text parsed = json.loads(response) # Twitter Labs endpoint subject to change try: tweet_data[user] = [ tweet["text"] for tweet in parsed["data"] ] except: tweet_data[user] = None else: return False, response.status_code pickle.dump(tweet_data, data_file) # Store paths to wordcloud images self.wordcloud_paths = self.make_wordclouds() return True, 200