def test_load(self): pubs, stats = gs.parse_dblp_page(open(self.test_file).read()) self.assertEqual(len(pubs), 31) self.assertEqual( pubs[0]['title'], 'Detecting Change Points in the Large-Scale Structure of Evolving Networks.' ) self.assertEqual(pubs[0]['pub_type'], 'inproceedings')
def test_load(self): pubs, stats = gs.parse_dblp_page(open(self.test_file).read()) self.assertEqual(len(pubs), 31) self.assertEqual(pubs[0]["title"], "Detecting Change Points in the Large-Scale Structure of Evolving Networks.") self.assertEqual(pubs[0]["pub_type"], "inproceedings")
gs_file = os.path.join(args.gs_dir, GS_FILE % (f['gs'], num_loaded)) all_pubs = [] stats = None while os.path.isfile(gs_file): pubs, stats = parse_gs_page(open(gs_file).read()) all_pubs += pubs num_loaded += 1 gs_file = os.path.join(args.gs_dir, GS_FILE % (f['gs'], num_loaded)) output_file = os.path.join(args.gs_dir, GS_PKL % f['gs']) with open(output_file,'wb') as fp: pickle.dump(all_pubs, fp) pickle.dump(stats, fp) if 'dblp' in f and args.dblp_dir is not None: dblp_file = os.path.join(args.dblp_dir, DBLP_FILE % (f['dblp'], 0)) all_pubs, stats = parse_dblp_page(open(dblp_file).read()) for pub in all_pubs: role = get_author_role(f.facultyName, pub['authors']) pub['author_role'] = role output_file = os.path.join(args.dblp_dir, DBLP_PKL % f['dblp']) with open(output_file,'wb') as fp: pickle.dump(all_pubs, fp) pickle.dump(stats, fp) print num_processed, f['facultyName'] num_processed += 1