def get_data(): stack = [] for line in file_text.split('\n'): line = line.strip() # print(line) if line: if line.startswith('#'): stack.append(('topic', line[1:].lstrip())) elif line.startswith('.'): match = re.match(r'(.*)\s(http?.*)$', line[1:].lstrip()) if match: sub_question = {'sub_question_name': match.group(1), 'link': match.group(2), 'path': ''} else: sub_question = {'sub_question_name': line[1:].lstrip(), 'link': '', 'path': ''} stack.append(('sub_question', sub_question)) else: match = re.match(r'(.*)\s(http?.*)$', line.lstrip()) if match: sub_question = {'question_name': match.group(1), 'link': match.group(2), 'path': ''} else: sub_question = {'question_name': line.lstrip(), 'link': '', 'path': ''} stack.append(('question', sub_question)) # pprint(stack) json = parse_txt_file(stack, all_questions) reusables.save_json(json, 'data.json', indent=2, ) # pprint(json) return json
def test_json_save(self): test_data = {"Hello": ["how", "are"], "You": "?", "I'm": True, "fine": 5} afile = reusables.join_paths(test_root, "test.json") try: reusables.save_json(test_data, afile) out_data = reusables.load_json(afile) finally: try: os.unlink(afile) except OSError: pass assert out_data == test_data
def test_json_save(self): test_data = { "Hello": ["how", "are"], "You": "?", "I'm": True, "fine": 5 } afile = reusables.join_paths(test_root, "test.json") try: reusables.save_json(test_data, afile) out_data = reusables.load_json(afile) finally: try: os.unlink(afile) except OSError: pass assert out_data == test_data
def refresh_disk_data(self): disk_json = reusables.load_json('database.json') disk_json[self.movie_dat['id']] = self.movie_dat reusables.save_json(disk_json, 'database.json', indent=2)
if len(self.last_page) != 0: self.last_ts = self.last_page[-1]["created_utc"] print(len(submissions)) # time.sleep(1) return submissions[:max_submissions] n = Network() # Inputs subreddit_name = sys.argv[1] # 'oneliners' max_submission = int(sys.argv[2]) # 200 # No of post to fetch, Note some post maybe deleted or not avaible data = n.crawler(subreddit_name, max_submissions=max_submission) # Save the file in subreddit_name.json reusables.save_json(data, '{}.json'.format(subreddit_name), indent=2) print('\nDone, file created as {}.json'.format(subreddit_name)) ''' Some info about json format full_link : is the link to the original reddit page title : title of the reddit page selftext: for text in the post url: may contain image link or video link if it ends with jpg, jpeg, png, mp4 or may contains imgur, gfycat link too '''