예제 #1
0
def get_data():
    stack = []
    for line in file_text.split('\n'):
        line = line.strip()
        # print(line)
        if line:
            if line.startswith('#'):
                stack.append(('topic', line[1:].lstrip()))
            elif line.startswith('.'):
                match = re.match(r'(.*)\s(http?.*)$', line[1:].lstrip())
                if match:
                    sub_question = {'sub_question_name': match.group(1), 'link': match.group(2), 'path': ''}
                else:
                    sub_question = {'sub_question_name': line[1:].lstrip(), 'link': '', 'path': ''}

                stack.append(('sub_question', sub_question))
            else:
                match = re.match(r'(.*)\s(http?.*)$', line.lstrip())
                if match:
                    sub_question = {'question_name': match.group(1), 'link': match.group(2), 'path': ''}
                else:
                    sub_question = {'question_name': line.lstrip(), 'link': '', 'path': ''}
                stack.append(('question', sub_question))

    # pprint(stack)
    json = parse_txt_file(stack, all_questions)
    reusables.save_json(json, 'data.json', indent=2, )
    # pprint(json)
    return json
예제 #2
0
    def test_json_save(self):
        test_data = {"Hello": ["how", "are"], "You": "?", "I'm": True, "fine": 5}
        afile = reusables.join_paths(test_root, "test.json")
        try:
            reusables.save_json(test_data, afile)
            out_data = reusables.load_json(afile)
        finally:
            try:
                os.unlink(afile)
            except OSError:
                pass

        assert out_data == test_data
예제 #3
0
    def test_json_save(self):
        test_data = {
            "Hello": ["how", "are"],
            "You": "?",
            "I'm": True,
            "fine": 5
        }
        afile = reusables.join_paths(test_root, "test.json")
        try:
            reusables.save_json(test_data, afile)
            out_data = reusables.load_json(afile)
        finally:
            try:
                os.unlink(afile)
            except OSError:
                pass

        assert out_data == test_data
예제 #4
0
 def refresh_disk_data(self):
     disk_json = reusables.load_json('database.json')
     disk_json[self.movie_dat['id']] = self.movie_dat
     reusables.save_json(disk_json, 'database.json', indent=2)
예제 #5
0
            if len(self.last_page) != 0:
                self.last_ts = self.last_page[-1]["created_utc"]
            print(len(submissions))
            # time.sleep(1)

        return submissions[:max_submissions]


n = Network()

# Inputs
subreddit_name = sys.argv[1]  # 'oneliners'
max_submission = int(sys.argv[2])  # 200 # No of post to fetch, Note some post maybe deleted or not avaible

data = n.crawler(subreddit_name, max_submissions=max_submission)

# Save the file in subreddit_name.json
reusables.save_json(data, '{}.json'.format(subreddit_name), indent=2)
print('\nDone, file created as {}.json'.format(subreddit_name))

'''
Some info about json format

full_link : is the link to the original reddit page
title : title of the reddit page
selftext:  for text in the post
url: may contain image link or video link if it ends with jpg, jpeg, png, mp4  or may contains imgur, gfycat link too


'''