def get_pubmed_data(self, max=None, pmids=None): if pmids: query_pmids = pmids else: all_pmids = datasources.ochsner.get_all_pmids() if not max: max = len(all_pmids) query_pmids = all_pmids[0:max] pubmed_request = """pubmed.pubmed_id, ochsner.found_geo_data_submission, ochsner.found_arrayexpress_data_submission, ochsner.found_smd_data_submission, ochsner.found_any_data_submission, ochsner.list_data_ids, pubmed.is_humans, pubmed.is_animals, pubmed.is_mice, pubmed.is_fungi, pubmed.is_bacteria, pubmed.is_plants, pubmed.is_viruses, pubmed.is_cultured_cells, pubmed.is_cancer, pubmed.date_published, pubmed.authors, pubmed.journal, pubmed.number_times_cited_in_pmc, isi.impact_factor """ data = dataset.collect_data(query_pmids, pubmed_request) csv_data = dataset.csv_format(data) if False: FILENAME = "../../results/aim2b_part2_pubmed.txt" writer = open(FILENAME, "w") for row in csv_data: writer.write("\t".join([str(col) for col in row])) writer.write("\r\n") writer.close() assert_equals(len(csv_data), 1 + len(query_pmids)) return (csv_data)
def get_pubmed_data(self, max=None, pmids=None): if pmids: query_pmids = pmids else: all_pmids = datasources.ochsner.get_all_pmids() if not max: max = len(all_pmids) query_pmids = all_pmids[0:max] pubmed_request = """pubmed.pubmed_id, ochsner.found_geo_data_submission, ochsner.found_arrayexpress_data_submission, ochsner.found_smd_data_submission, ochsner.found_any_data_submission, ochsner.list_data_ids, pubmed.is_humans, pubmed.is_animals, pubmed.is_mice, pubmed.is_fungi, pubmed.is_bacteria, pubmed.is_plants, pubmed.is_viruses, pubmed.is_cultured_cells, pubmed.is_cancer, pubmed.date_published, pubmed.authors, pubmed.journal, pubmed.number_times_cited_in_pmc, isi.impact_factor """ data = dataset.collect_data(query_pmids, pubmed_request) csv_data = dataset.csv_format(data) if False: FILENAME = "../../results/aim2b_part2_pubmed.txt" writer = open(FILENAME, "w") for row in csv_data: writer.write("\t".join([str(col) for col in row])) writer.write("\r\n") writer.close() assert_equals(len(csv_data), 1+len(query_pmids)) return(csv_data)
def write_to_file(self, data, filename): file = open(filename, "w") csv_data = dataset.csv_format(data) for row in csv_data: joined_row_contents = [] for col in row: if col is None: col = u"" if col == True: col = u"1" if col == False: col = u"0" try: file.write(str(col)) except UnicodeEncodeError: new_string = utils.unicode.clean_up_strange_unicode(col) file.write(new_string) file.write(u"\t") file.write(u"\r\n") file.close() return (csv_data)
def write_to_file(self, data, filename): file = open(filename, "w") csv_data = dataset.csv_format(data) for row in csv_data: joined_row_contents = [] for col in row: if col is None: col = u"" if col==True: col = u"1" if col==False: col = u"0" try: file.write(str(col)) except UnicodeEncodeError: new_string = utils.unicode.clean_up_strange_unicode(col) file.write(new_string) file.write(u"\t") file.write(u"\r\n") file.close() return(csv_data)
def test_csv_format(self): input = test_data_has_geo_data_submission response = dataset.csv_format(input) assert_equals(response, test_data_csv_format)