def recognize_files(connection, accession_file_map, new_folder): # Files Recognition fu = FilesUtil(connection) application = connection.application('genestack/upload') recognised_files = application.invoke('recognizeGroupsByAccession', accession_file_map.keys()) recognized_accessions = set() for x in recognised_files: for sources in x['sourceFileInfos'].values(): for info in sources: recognized_accessions.add(info['accession']) created_files = application.invoke('createFiles', recognised_files, [], None) groups = sorted(created_files['files'], key=itemgetter('kind')) for name, group in groupby(groups, key=itemgetter('kind')): print(name) # maybe sort by filename before printing a group? for f in group: print('\t%s / %s' % (f['accession'], f['name'])) unrecognized_file_infos = set(accession_file_map) - recognized_accessions if unrecognized_file_infos: print('Unrecognized Raw Files') for accession in unrecognized_file_infos: print('\t%s / %s' % (accession, accession_file_map[accession].decode('utf-8'))) # move unrecognized files to new folder unrecognized_folder = fu.create_folder("Unrecognized files", parent=new_folder) for accession in unrecognized_file_infos: fu.link_file(accession, unrecognized_folder) fu.unlink_file(accession, new_folder) print('Unrecognized files moved to %s / %s' % (unrecognized_folder, 'Unrecognized files'))
def upload_files(connection, files, folder_name, folder_accession): """ :param genestack_client.Connection connection: :param list[str] files: :param str folder_name: :param str folder_accession: """ importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) if not folder_accession: folder_name = folder_name or datetime.now().strftime( 'Upload %d.%m.%y %H:%M:%S') folder_accession = fu.create_folder( folder_name, parent=upload, description='Files uploaded by genestack-uploader') else: folder_name = fu.get_infos([folder_accession])[0]['name'] accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, folder_accession) fu.unlink_file(accession, upload) accession_file_map[accession] = f return folder_accession, folder_name, accession_file_map
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local( '2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == { 'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**' } assert metainfo.get('j')[0].get_publication() == { 'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {} } assert metainfo.get( Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
def test_en_isoforms(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforIsoforms(conn) en_file = None try: groups = [{'accessions': accs} for accs in ISOFORM_GROUPS] en_file = en.create_file(groups, multi_mapping_corr=True) finally: if (not keep_files) and (en_file is not None): fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_rna_seq(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforGenes(conn) en_file = None try: groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS] en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism") finally: if (not keep_files) and (en_file is not None): fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def upload_files(connection, files, folder_name): importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S') new_folder = fu.create_folder(folder_name, parent=upload, description='Files uploaded by genestack-uploader') accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, new_folder) fu.unlink_file(accession, upload) accession_file_map[accession] = f return new_folder, folder_name, accession_file_map
def test_en_microarrays(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforMicroarrays(conn) norm_app = AffymetrixMicroarraysNormalizationApplication(conn) en_file = None norm_file = None try: groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS] groups[0]['is_control'] = True norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group]) en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION) finally: if not keep_files: created = fu.get_special_folder(SpecialFolders.CREATED) for f in (norm_file, en_file): if f is not None: fu.unlink_file(f, created)
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**'} assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {}} assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry: application = app_entry.get('name') or app_entry.get( 'id', "Unknown application") else: application = "Unknown application" # if there is a folder for this group, we add the file to it ; # otherwise, we create one, add it to our dictionary of folders and add the file to it if application not in grouping_folders: new_folder = files_util.create_folder("Files for %s" % application, parent=output_folder) grouping_folders[application] = new_folder files_util.link_file(accession, grouping_folders[application]) if move_files: files_util.unlink_file(accession, source_folder) print "All done! Your files can be found inside the folder with accession %s" % output_folder
print "Collecting files..." files = files_util.get_file_children(source_folder) files_count = len(files) print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry: application = app_entry.get('name') or app_entry.get('id', "Unknown application") else: application = "Unknown application" # if there is a folder for this group, we add the file to it ; # otherwise, we create one, add it to our dictionary of folders and add the file to it if application not in grouping_folders: new_folder = files_util.create_folder("Files for %s" % application, parent=output_folder) grouping_folders[application] = new_folder files_util.link_file(accession, grouping_folders[application]) if move_files: files_util.unlink_file(accession, source_folder) print "All done! Your files can be found inside the folder with accession %s" % output_folder