def runTest(self): sample = setup.sample(100, 85, 'id', 'abstract', 'title') self.assertEqual(len(sample), 2) self.assertTrue(len(sample['train']) <= 85) self.assertTrue(len(sample['test']) <= 15) for f in sample['train']: for arg in ['id', 'abstract', 'title']: self.assertIn(arg, f.keys()) for f in sample['test']: for arg in ['id', 'abstract', 'title']: self.assertIn(arg, f.keys())
def runTest(self): sample = setup.sample(100,85,'id','abstract','title') self.assertEqual(len(sample),2) self.assertTrue(len(sample['train']) <= 85) self.assertTrue(len(sample['test']) <= 15) for f in sample['train']: for arg in ['id','abstract','title']: self.assertIn(arg,f.keys()) for f in sample['test']: for arg in ['id','abstract','title']: self.assertIn(arg,f.keys())
#!/usr/bin/python ''' Script to gather sample articles from solr and write them to json ''' import plos_classification.setup as setup, json from datetime import datetime print datetime.now(), 'gathering sample articles' s = setup.sample(15000, 10000, 'abstract', 'title', 'subject2_hierarchy', 'cross_published_journal_key') print datetime.now(), 'writing sample to file' o = open('data/sample.json', 'w') json.dump(s, o, indent=2) print datetime.now(), 'finished'
#!/usr/bin/python ''' Script to gather sample articles from solr and write them to json ''' import plos_classification.setup as setup, json from datetime import datetime print datetime.now(), 'gathering sample articles' s = setup.sample(15000,10000,'abstract','title','subject2_hierarchy','cross_published_journal_key') print datetime.now(), 'writing sample to file' o = open('data/sample.json','w') json.dump(s,o,indent=2) print datetime.now(),'finished'