Example #1
0
	new_graph = {k:v for (k, v) in graph.iteritems() if k != 'children'}
	new_graph['children'] = []
	for child in graph['children']:
		(total_children, total_children_score) = get_total_children_score(child)
		(immediate_children, immediate_children_score) = get_immediate_children_score(child)
		new_child = {k:v for k in child if k != 'children'}
		new_child['total_children'] = total_children
		new_child['total_children_score'] = total_children_score
		new_child['immediate_children'] = immediate_children
		new_child['immediate_children_score'] = immediate_children_score
		new_graph['children'].append(new_child)
	return new_graph


if __name__ == '__main__':
	remake_folder('singles')
	remake_folder('convos')
	subs = get_post_groups('../comments_by_posts')
	for (subdir, sub) in subs:
		remake_folder('singles/' + sub)
		link_groups = os.listdir(subdir)
		for link_group in link_groups:
			with open(subdir + '/' + link_group, 'r') as f:
				comments = [json.loads(line) for line in f.readlines()]
				comments = sorted(comments, key=lambda comment: comment['link_id'])
				singles = []
				convos = []
				for link, g in itertools.groupby(comments, key=lambda comment: comment['link_id']):
					graph = create_graph(g)
					singles.append(make_singles(graph))
					convo_graph = make_convos(graph)
Example #2
0
        self.df = pd.DataFrame(index=filenames, columns=["done"])
        self.df.done = False
        if os.path.exists(self.fname):
            saved_df = pd.read_csv(self.fname)
            saved_df.columns = ["fnames", "done"]
            self.df.ix[saved_df[saved_df.done == True].fnames, "done"] = True

    def completed(self, filename):
        try:
            self.df.ix[filename, "done"] = True
            self.df.to_csv(self.fname)
        finally:
            self.df.ix[filename, "done"] = True
            self.df.to_csv(self.fname)


if __name__ == "__main__":
    sub_filenames = sorted(get_sub_files("../sub_files"))
    df = StatusDF(sub_filenames)
    for sub_filename in sub_filenames:
        if df.df.ix[sub_filename, "done"]:
            print "%s ALREADY COMPLETED" % sub_filename
        else:
            print "%s" % sub_filename
            sub_name = sub_filename.split("/")[-1]
            remake_folder(sub_name)
            with open(sub_filename, "r") as sub_file:
                for i, lines in enumerate(get_chunks_of_file(sub_file, True)):
                    save_lines(lines, sub_name + "/", i + 1)
            df.completed(sub_filename)