def main(): # == == == == == == == == Part 1: Set up experiment parameters == == == == == == == == # total_start_time = time.time() data_prefix = '../data/' forecast_filepath = 'vevo_forecast_data_60k.tsv' recsys_dirpath = 'recsys' snapshot_dirpath = 'network_pickle' if not os.path.exists(os.path.join(data_prefix, snapshot_dirpath)): os.mkdir(os.path.join(data_prefix, snapshot_dirpath)) # == == == == == == Part 2: Load vevo en videos 61k dataset == == == == == == # vid_embed_dict = {} vid_view_dict = {} with open(os.path.join(data_prefix, forecast_filepath), 'r') as fin: for line in fin: embed, vid, ts_view, total_view = line.rstrip().split('\t') vid_embed_dict[vid] = int(embed) ts_view = np.array(intify(ts_view.split(','))) vid_view_dict[vid] = ts_view vevo_en_vid_list = list(sorted(vid_embed_dict.keys())) num_videos = len(vevo_en_vid_list) for t in range(T): timer = Timer() timer.start() target_date_str = obj2str(datetime(2018, 9, 1) + timedelta(days=t)) recsys_filepath = 'recsys_{0}.json'.format(target_date_str) snapshot_filepath = 'network_{0}.p'.format(target_date_str) network_mat = {embed: [] for embed in range(num_videos)} with open(os.path.join(data_prefix, recsys_dirpath, recsys_filepath), 'r') as fin: for line in fin: network_json = json.loads(line.rstrip()) source = network_json['vid'] targets = network_json['relevant_list'][: MAX_POSITION] for position, target in enumerate(targets): if target in vevo_en_vid_list: # add embedding of incoming video and position of target video on source video network_mat[vid_embed_dict[target]].append((vid_embed_dict[source], position, vid_view_dict[source][t])) with open(os.path.join(data_prefix, snapshot_dirpath, snapshot_filepath), 'wb') as fout: pickle.dump(network_mat, fout) print('>>> Finish dumping date {0}'.format(target_date_str)) timer.stop() print('>>> Network structure has been dumped!') print('>>> Total elapsed time: {0}\n'.format(str(timedelta(seconds=time.time() - total_start_time))[:-3]))
def _extract_tweet(self, filequeue): while not filequeue.empty(): filepath = filequeue.get() try: filedata = bz2.BZ2File(filepath, mode='r') except: self.logger.warn( 'Exists non-bz2 file {0} in dataset folder'.format( filepath)) continue filename, filetype = os.path.basename( os.path.normpath(filepath)).split('.') tweet_output = bz2.open( os.path.join(self.tweet_stats_path, '{0}.bz2'.format(filename)), 'at') user_output = bz2.open( os.path.join(self.user_stats_path, '{0}.bz2'.format(filename)), 'at') visited_user_ids = set() for line in filedata: try: if line.rstrip(): tweet_json = json.loads(line) # 3. ratemsg, timestamp_ms, track if 'limit' in tweet_json: # rate limit message # {"limit":{"track":283540,"timestamp_ms":"1483189188944"}} tweet_output.write('{0},{1},{2}\n'.format( 'ratemsg', tweet_json['limit']['timestamp_ms'], tweet_json['limit']['track'])) continue if 'id_str' not in tweet_json: continue # 1. tweet_id_str, created_at, timestamp_ms, user_id_str, # original_lang, retweeted_lang, quoted_lang, # original_vids, retweeted_vids, quoted_vids, # original_mentions, retweeted_mentions, quoted_mentions, # original_hashtags, retweeted_hashtags, quoted_hashtags, # original_geoname, retweeted_geoname, quoted_geoname, # original_countrycode, retweeted_countrycode, quoted_countrycode, # original_filter, retweeted_filter, quoted_filter, # original_retweet_count, retweeted_retweet_count, quoted_retweet_count, # original_favorite_count, retweeted_favorite_count, quoted_favorite_count, # original_user_followers_count, retweeted_user_followers_count, quoted_user_followers_count, # original_user_friends_count, retweeted_user_friends_count, quoted_user_friends_count, # original_user_statuses_count, retweeted_user_statuses_count, quoted_user_statuses_count, # original_user_favourites_count, retweeted_user_favourites_count, quoted_user_favourites_count, # reply_tweet_id_str, retweeted_tweet_id_str, quoted_tweet_id_str, # reply_user_id_str, retweeted_user_id_str, quoted_user_id_str, # original_text, retweeted_text, quoted_text tweet_id = tweet_json['id_str'] created_at = obj2str(str2obj(tweet_json['created_at'], fmt='tweet'), fmt='youtube') timestamp_ms = tweet_json['timestamp_ms'] user_id_str = tweet_json['user']['id_str'] if 'lang' in tweet_json: lang = tweet_json['lang'] else: lang = 'N' original_vids, retweeted_vids, quoted_vids = self._extract_vids( tweet_json) original_mentions, retweeted_mentions, quoted_mentions = self._extract_mentions( tweet_json) original_hashtags, retweeted_hashtags, quoted_hashtags, = self._extract_hashtags( tweet_json) if tweet_json['place'] is not None: original_geo = self._replace_comma_space( tweet_json['place']['full_name']) original_cc = self._replace_comma_space( tweet_json['place']['country_code']) else: original_geo = 'N' original_cc = 'N' original_filter = tweet_json['filter_level'] original_retweet_count = tweet_json['retweet_count'] original_favorite_count = tweet_json['favorite_count'] original_user_followers_count = tweet_json['user'][ 'followers_count'] original_user_friends_count = tweet_json['user'][ 'friends_count'] original_user_statuses_count = tweet_json['user'][ 'statuses_count'] original_user_favourites_count = tweet_json['user'][ 'favourites_count'] reply_tweet_id_str = self._replace_with_nan( tweet_json['in_reply_to_status_id_str']) reply_user_id_str = self._replace_with_nan( tweet_json['in_reply_to_user_id_str']) if 'extended_tweet' in tweet_json and 'full_text' in tweet_json[ 'extended_tweet']: text = self._replace_comma_space( tweet_json['extended_tweet']['full_text']) elif tweet_json['text'] is not None: text = self._replace_comma_space( tweet_json['text']) else: text = 'N' retweeted_tweet_id_str, retweeted_user_id_str, retweeted_user_location,\ retweeted_lang, retweeted_geo, retweeted_cc, retweeted_filter, \ retweeted_retweet_count, retweeted_favorite_count, retweeted_user_followers_count, \ retweeted_user_friends_count, retweeted_user_statuses_count, retweeted_user_favourites_count, \ retweeted_text = self._extract_entities(tweet_json, 'retweeted_status') quoted_tweet_id_str, quoted_user_id_str, quoted_user_location,\ quoted_lang, quoted_geo, quoted_cc, quoted_filter, \ quoted_retweet_count, quoted_favorite_count, quoted_user_followers_count, \ quoted_user_friends_count, quoted_user_statuses_count, quoted_user_favourites_count, \ quoted_text = self._extract_entities(tweet_json, 'quoted_status') tweet_output.write( '{0},{1},{2},{3},' '{4},{5},{6},' '{7},{8},{9},' '{10},{11},{12},' '{13},{14},{15},' '{16},{17},{18},' '{19},{20},{21},' '{22},{23},{24},' '{25},{26},{27},' '{28},{29},{30},' '{31},{32},{33},' '{34},{35},{36},' '{37},{38},{39},' '{40},{41},{42},' '{43},{44},{45},' '{46},{47},{48},' '{49},{50},{51}\n'.format( tweet_id, created_at, timestamp_ms, user_id_str, lang, retweeted_lang, quoted_lang, strify(original_vids, delimiter=';'), strify(retweeted_vids, delimiter=';'), strify(quoted_vids, delimiter=';'), strify(original_mentions, delimiter=';'), strify(retweeted_mentions, delimiter=';'), strify(quoted_mentions, delimiter=';'), strify(original_hashtags, delimiter=';'), strify(retweeted_hashtags, delimiter=';'), strify(quoted_hashtags, delimiter=';'), original_geo, retweeted_geo, quoted_geo, original_cc, retweeted_cc, quoted_cc, original_filter, retweeted_filter, quoted_filter, original_retweet_count, retweeted_retweet_count, quoted_retweet_count, original_favorite_count, retweeted_favorite_count, quoted_favorite_count, original_user_followers_count, retweeted_user_followers_count, quoted_user_followers_count, original_user_friends_count, retweeted_user_friends_count, quoted_user_friends_count, original_user_statuses_count, retweeted_user_statuses_count, quoted_user_statuses_count, original_user_favourites_count, retweeted_user_favourites_count, quoted_user_favourites_count, reply_tweet_id_str, retweeted_tweet_id_str, quoted_tweet_id_str, reply_user_id_str, retweeted_user_id_str, quoted_user_id_str, text, retweeted_text, quoted_text)) # 2. user_id_str, screen_name, created_at, verified, location, followers_count, friends_count, listed_count, statuses_count, description if user_id_str not in visited_user_ids: user_screen_name, user_created_at, user_verified, user_location, user_followers_count, user_friends_count, user_listed_count, user_statuses_count, user_description = self._extract_user_entities( tweet_json['user']) user_output.write( '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9}\n'. format(user_id_str, user_screen_name, user_created_at, user_verified, user_location, user_followers_count, user_friends_count, user_listed_count, user_statuses_count, user_description)) visited_user_ids.add(user_screen_name) if 'retweeted_status' in tweet_json: ruser_id_str = tweet_json['retweeted_status'][ 'user']['id_str'] if ruser_id_str not in visited_user_ids: ruser_screen_name, ruser_created_at, ruser_verified, ruser_location, ruser_followers_count, ruser_friends_count, ruser_listed_count, ruser_statuses_count, ruser_description = self._extract_user_entities( tweet_json['retweeted_status']['user']) user_output.write( '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9}\n' .format(ruser_id_str, ruser_screen_name, ruser_created_at, ruser_verified, ruser_location, ruser_followers_count, ruser_friends_count, ruser_listed_count, ruser_statuses_count, ruser_description)) visited_user_ids.add(ruser_id_str) if 'quoted_status' in tweet_json: quser_id_str = tweet_json['quoted_status']['user'][ 'id_str'] if quser_id_str not in visited_user_ids: quser_screen_name, quser_created_at, quser_verified, quser_location, quser_followers_count, quser_friends_count, quser_listed_count, quser_statuses_count, quser_description = self._extract_user_entities( tweet_json['quoted_status']['user']) user_output.write( '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9}\n' .format(quser_id_str, quser_screen_name, quser_created_at, quser_verified, quser_location, quser_followers_count, quser_friends_count, quser_listed_count, quser_statuses_count, quser_description)) visited_user_ids.add(quser_id_str) except EOFError: self.logger.error( 'EOFError: {0} ended before the logical end-of-stream was detected,' .format(filename)) tweet_output.close() user_output.close() filedata.close() self.logger.debug('{0} done!'.format(filename)) print('{0} done!'.format(filename))
def main(): # == == == == == == Part 1: Set up environment == == == == == == # timer = Timer() timer.start() data_prefix = '../data/' # == == == == == == Part 2: Load video views == == == == == == # data_loader = DataLoader() data_loader.load_video_views() embed_avg_view_dict = data_loader.embed_avg_view_dict num_videos = data_loader.num_videos data_loader.load_embed_content_dict() embed_cid_dict = data_loader.embed_cid_dict embed_genre_dict = data_loader.embed_genre_dict # == == == == == == Part 3: Load dynamic network snapshot == == == == == == # network_dict_list = [] for t in range(T): target_date_str = obj2str(datetime(2018, 9, 1) + timedelta(days=t)) filename = 'network_{0}.p'.format(target_date_str) network_dict = pickle.load( open(os.path.join(data_prefix, 'network_pickle', filename), 'rb')) for embed in network_dict: network_dict[embed] = [ x[0] for x in network_dict[embed] if x[1] < NUM_REL ] network_dict_list.append(network_dict) persistent_src_embed_set = set() persistent_tar_embed_set = set() existing_edges = set() num_reciprocal_edges = 0 num_same_artist = 0 num_same_genre = 0 with open(os.path.join(data_prefix, 'persistent_network.csv'), 'w') as fout: fout.write('Source,Target\n') for tar_embed in range(num_videos): src_union_set = set() for t in range(T): src_union_set.update(set(network_dict_list[t][tar_embed])) for src_embed in src_union_set: linkage_list = [0] * T for t in range(T): if src_embed in network_dict_list[t][tar_embed]: linkage_list[t] = 1 if is_persistent_link(linkage_list): # filter: at least 100 daily views for target video, # and the mean daily views of source video is at least 1% of the target video src_mean = embed_avg_view_dict[src_embed] tar_mean = embed_avg_view_dict[tar_embed] if tar_mean >= 100 and src_mean >= 0.01 * tar_mean: fout.write('{0},{1}\n'.format(src_embed, tar_embed)) persistent_src_embed_set.add(src_embed) persistent_tar_embed_set.add(tar_embed) if '{1}-{0}'.format(src_embed, tar_embed) in existing_edges: num_reciprocal_edges += 1 if embed_cid_dict[src_embed] == embed_cid_dict[ tar_embed]: num_same_artist += 1 if is_same_genre(embed_genre_dict[src_embed], embed_genre_dict[tar_embed]): num_same_genre += 1 existing_edges.add('{0}-{1}'.format( src_embed, tar_embed)) print('{0} edges in the persistent network'.format(len(existing_edges))) print( '{0} source videos, {1} target videos, {2} videos appear in both set'. format( len(persistent_src_embed_set), len(persistent_tar_embed_set), len(persistent_src_embed_set.intersection( persistent_tar_embed_set)))) print('{0} pairs of reciprocal edges'.format(num_reciprocal_edges)) print('{0} ({1:.1f}%) edges belong to the same artist'.format( num_same_artist, 100 * num_same_artist / len(existing_edges))) print('{0} ({1:.1f}%) edges belong to the same genre'.format( num_same_genre, 100 * num_same_genre / len(existing_edges))) timer.stop()
def main(): # == == == == == == Part 1: Set up environment == == == == == == # timer = Timer() timer.start() data_prefix = '../data' # == == == == == == Part 2: Load video views == == == == == == # data_loader = DataLoader() data_loader.load_video_views() embed_view_dict = data_loader.embed_view_dict num_videos = data_loader.num_videos total_views = [] # == == == == == == Part 3: Load network snapshot as cutoff value changes == == == == == == # for t in range(T): total_views.append( sum([embed_view_dict[embed][t] for embed in range(num_videos)])) snapshot_date = obj2str(datetime(2018, 9, 1) + timedelta(days=t)) snapshot_filename = 'network_{0}.p'.format(snapshot_date) nodes_set = set() num_edges = 0 embedded_graph = defaultdict(list) with open( os.path.join(data_prefix, 'network_pickle', snapshot_filename), 'rb') as fin: network_dict = pickle.load(fin) # embed_tar: [(embed_src, pos_src, view_src)] for embed_tar in range(num_videos): for embed_src, pos_src, _ in network_dict[embed_tar]: if pos_src < CUTOFF: embedded_graph[embed_src].append(embed_tar) nodes_set.add(embed_src) nodes_set.add(embed_tar) num_edges += 1 logging.info('>>> Graph embedding @ date {0} has been loaded!'.format( snapshot_date)) logging.info('>>> {0} nodes and {1} edges in the graph'.format( len(nodes_set), num_edges)) logging.info(' {0} views throughout the graph'.format( total_views[t])) # == == == == == == Part 4: Extract bow-tie structure == == == == == == # scc_content = tarjan(embedded_graph) scc_content = sorted(scc_content, key=lambda x: len(x), reverse=True) # largest SCC largest_scc = scc_content.pop(0) logging.info('>>> {0} ({1:.2f}%) nodes in the largest SCC'.format( len(largest_scc), len(largest_scc) / num_videos * 100)) largest_scc_views = sum( [embed_view_dict[embed][t] for embed in largest_scc]) logging.info(' {0} ({1:.2f}%) views in the largest SCC'.format( largest_scc_views, largest_scc_views / total_views[t] * 100)) # find IN, OUT, Tendrils, Disconnected in_component = [] num_scc_in = 0 to_visit_scc = [] for scc in scc_content: ret = is_in_component(scc, embedded_graph, largest_scc) if ret: in_component.extend(scc) num_scc_in += 1 else: to_visit_scc.append(scc) logging.info('>>> {0} ({1:.2f}%) nodes in the IN component'.format( len(in_component), len(in_component) / num_videos * 100)) logging.info(' {0} scc in the IN component'.format(num_scc_in)) in_views = sum([embed_view_dict[embed][t] for embed in in_component]) logging.info(' {0} ({1:.2f}%) views in the IN component'.format( in_views, in_views / total_views[t] * 100)) out_component = [] num_scc_out = 0 to_visit_scc2 = [] for scc in to_visit_scc: ret = is_out_component(scc, embedded_graph, largest_scc) if ret: out_component.extend(scc) num_scc_out += 1 else: to_visit_scc2.append(scc) logging.info('>>> {0} ({1:.2f}%) nodes in the OUT component'.format( len(out_component), len(out_component) / num_videos * 100)) logging.info(' {0} scc in the OUT component'.format(num_scc_out)) out_views = sum([embed_view_dict[embed][t] for embed in out_component]) logging.info(' {0} ({1:.2f}%) views in the OUT component'.format( out_views, out_views / total_views[t] * 100)) tendrils = [] num_scc_tendrils = 0 disconnected = [] num_disconnected = num_videos - len(nodes_set) num_scc_disconnected = 0 for scc in to_visit_scc2: ret = is_out_component(scc, embedded_graph, in_component) if ret: tendrils.extend(scc) num_scc_tendrils += 1 else: ret = is_in_component(scc, embedded_graph, out_component) if ret: tendrils.extend(scc) num_scc_tendrils += 1 else: disconnected.extend(scc) num_scc_disconnected += 1 logging.info('>>> {0} ({1:.2f}%) nodes in the Tendrils'.format( len(tendrils), len(tendrils) / num_videos * 100)) logging.info(' {0} scc in the Tendrils'.format(num_scc_tendrils)) tendrils_views = sum([embed_view_dict[embed][t] for embed in tendrils]) logging.info(' {0} ({1:.2f}%) views in the Tendrils'.format( tendrils_views, tendrils_views / total_views[t] * 100)) logging.info('>>> {0} ({1:.2f}%) nodes in the Disconnected'.format( num_disconnected + len(disconnected), (num_disconnected + len(disconnected)) / num_videos * 100)) logging.info( ' {0} scc in the Disconnected'.format(num_disconnected + num_scc_disconnected)) disc_views = total_views[ t] - largest_scc_views - in_views - out_views - tendrils_views logging.info(' {0} ({1:.2f}%) views in the Disconnected'.format( disc_views, disc_views / total_views[t] * 100)) print('>>> Finish computing bowtie at day {0}...'.format(t + 1)) timer.stop()
def main(): # == == == == == == Part 1: Set up environment == == == == == == # timer = Timer() timer.start() data_prefix = '../data/' year_labels = [ "all years", "'09", "'10", "'11", "'12", "'13", "'14", "'15", "'16", "'17", "'18" ] num_year = len(year_labels) - 1 # == == == == == == Part 2: Load video views == == == == == == # data_loader = DataLoader() data_loader.load_video_views() data_loader.load_embed_content_dict() embed_avg_view_dict = data_loader.embed_avg_view_dict embed_uploadtime_dict = data_loader.embed_uploadtime_dict num_videos = data_loader.num_videos for embed in range(num_videos): upload_year = int(embed_uploadtime_dict[embed][:4]) if upload_year >= 2009: year_idx = upload_year - 2009 else: year_idx = 0 embed_uploadtime_dict[embed] = year_idx views_by_years_list = [[] for _ in range(num_year)] indegrees_by_years_list = [[] for _ in range(num_year)] # == == == == == == Part 3: Load dynamic network snapshot == == == == == == # embed_indegree_dict_15 = { embed: np.zeros((T, )) for embed in np.arange(num_videos) } for t in range(T): filename = 'network_{0}.p'.format( obj2str(datetime(2018, 9, 1) + timedelta(days=t))) with open(os.path.join(data_prefix, 'network_pickle', filename), 'rb') as fin: network_dict = pickle.load(fin) # embed_tar: [(embed_src, pos_src, view_src)] for embed in range(num_videos): embed_indegree_dict_15[embed][t] = len( [1 for x in network_dict[embed] if x[1] < NUM_REL_15]) print('>>> Finish loading day {0}...'.format(t + 1)) print('>>> Network structure has been loaded!') for embed in range(num_videos): views_by_years_list[embed_uploadtime_dict[embed]].append( embed_avg_view_dict[embed]) indegrees_by_years_list[embed_uploadtime_dict[embed]].append( np.mean(embed_indegree_dict_15[embed])) spearman_traces = [] all_views, all_indegrees = [], [] for i in range(num_year): all_views.extend(views_by_years_list[i]) all_indegrees.extend(indegrees_by_years_list[i]) print('\n>>> {0}'.format(year_labels[0]), spearmanr(all_views, all_indegrees)) spearman_traces.append(spearmanr(all_views, all_indegrees)[0]) for i in range(num_year): spearman_traces.append( spearmanr(views_by_years_list[i], indegrees_by_years_list[i])[0]) print('>>> {0} year'.format(year_labels[1 + i]), spearmanr(views_by_years_list[i], indegrees_by_years_list[i])) # == == == == == == Part 4: Plotting script == == == == == == # fig, ax1 = plt.subplots(1, 1, figsize=(8, 2)) tomato = ColorPalette.TOMATO blue = ColorPalette.BLUE bar1 = ax1.bar(range(num_year + 1), spearman_traces, edgecolor=['k'] * (num_year + 1), color=[tomato] + [blue] * num_year, lw=1) for rect in bar1: height = rect.get_height() plt.text(rect.get_x() + rect.get_width() / 2.0, height, '{0:.3f}'.format(height), ha='center', va='bottom') ax1.set_xticks(np.arange(11)) ax1.set_xticklabels(year_labels) ax1.set_ylabel(r'spearman $\rho$') hide_spines(ax1) timer.stop() plt.tight_layout() plt.savefig('../images/measure_spearmanr.pdf', bbox_inches='tight') if not platform.system() == 'Linux': plt.show()
def main(): # == == == == == == Part 1: Set up environment == == == == == == # timer = Timer() timer.start() data_prefix = '../data/' # == == == == == == Part 2: Load video views == == == == == == # data_loader = DataLoader() data_loader.load_video_views() embed_view_dict = data_loader.embed_view_dict embed_avg_view_dict = data_loader.embed_avg_view_dict # == == == == == == Part 3: Load persistent and non-persistent network == == == == == == # reciprocal_link_set = set() persistent_link_set = set() non_persistent_link_set = set() with open(os.path.join(data_prefix, 'persistent_network.csv'), 'r') as fin: fin.readline() for line in fin: src_embed, tar_embed = map(int, line.rstrip().split(',')) link = '{0}-{1}'.format(src_embed, tar_embed) rec_link = '{1}-{0}'.format(src_embed, tar_embed) if rec_link in persistent_link_set: persistent_link_set.remove(rec_link) reciprocal_link_set.add(link) else: persistent_link_set.add(link) for t in range(T): target_date_str = obj2str(datetime(2018, 9, 1) + timedelta(days=t)) filename = 'network_{0}.p'.format(target_date_str) network_dict = pickle.load(open(os.path.join(data_prefix, 'network_pickle', filename), 'rb')) for tar_embed in network_dict: src_embed_list = [x[0] for x in network_dict[tar_embed] if x[1] < NUM_REL] if len(src_embed_list) > 0: for src_embed in src_embed_list: # filter: at least 100 daily views for target video, # and the mean daily views of source video is at least 1% of the target video if embed_avg_view_dict[tar_embed] >= 100 and embed_avg_view_dict[src_embed] >= 0.01 * embed_avg_view_dict[tar_embed]: link = '{0}-{1}'.format(src_embed, tar_embed) rec_link = '{1}-{0}'.format(src_embed, tar_embed) if link not in persistent_link_set and rec_link not in persistent_link_set \ and link not in reciprocal_link_set and rec_link not in reciprocal_link_set \ and link not in non_persistent_link_set and rec_link not in non_persistent_link_set: non_persistent_link_set.add(link) print('>>> Number of reciprocal links: {0}'.format(len(reciprocal_link_set))) print('>>> Number of persistent links (non-reciprocal): {0}'.format(len(persistent_link_set))) print('>>> Number of ephemeral links: {0}'.format(len(non_persistent_link_set))) for link_set, log_filename in zip([reciprocal_link_set, persistent_link_set, non_persistent_link_set], ['./reciprocal_pearsonr.log', './persistent_pearsonr.log', './ephemeral_pearsonr.log']): with open(log_filename, 'w') as log_file: for link in link_set: src_embed, tar_embed = map(int, link.split('-')) eff_size, pvalue = pearsonr(detsn(embed_view_dict[src_embed]), detsn(embed_view_dict[tar_embed])) log_file.write('{0},{1},{2},{3}\n'.format(src_embed, tar_embed, eff_size, pvalue)) timer.stop()