def recognize_files(connection, accession_file_map, new_folder):
    # Files Recognition
    fu = FilesUtil(connection)

    application = connection.application('genestack/upload')
    recognised_files = application.invoke('recognizeGroupsByAccession', accession_file_map.keys())

    recognized_accessions = set()
    for x in recognised_files:
        for sources in x['sourceFileInfos'].values():
            for info in sources:
                recognized_accessions.add(info['accession'])

    created_files = application.invoke('createFiles', recognised_files, [], None)
    groups = sorted(created_files['files'], key=itemgetter('kind'))
    for name, group in groupby(groups, key=itemgetter('kind')):
        print(name)
        # maybe sort by filename before printing a group?
        for f in group:
            print('\t%s / %s' % (f['accession'], f['name']))

    unrecognized_file_infos = set(accession_file_map) - recognized_accessions

    if unrecognized_file_infos:
        print('Unrecognized Raw Files')
        for accession in unrecognized_file_infos:
            print('\t%s / %s' % (accession, accession_file_map[accession].decode('utf-8')))
        # move unrecognized files to new folder
        unrecognized_folder = fu.create_folder("Unrecognized files", parent=new_folder)
        for accession in unrecognized_file_infos:
            fu.link_file(accession, unrecognized_folder)
            fu.unlink_file(accession, new_folder)
        print('Unrecognized files moved to %s / %s' % (unrecognized_folder, 'Unrecognized files'))
def upload_files(connection, files, folder_name, folder_accession):
    """
    :param genestack_client.Connection connection:
    :param list[str] files:
    :param str folder_name:
    :param str folder_accession:
    """
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    if not folder_accession:
        folder_name = folder_name or datetime.now().strftime(
            'Upload %d.%m.%y %H:%M:%S')
        folder_accession = fu.create_folder(
            folder_name,
            parent=upload,
            description='Files uploaded by genestack-uploader')
    else:
        folder_name = fu.get_infos([folder_accession])[0]['name']
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, folder_accession)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return folder_accession, folder_name, accession_file_map
Beispiel #3
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123",
                    "*****@*****.**")
    info.add_publication("j",
                         "My Publication",
                         "Myself",
                         "Journal of Me",
                         "23/12/2014",
                         pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info,
                                                       urls=[TEST_URL],
                                                       parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local(
            '2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {
            'name': 'Rosalind Franklin',
            'phone': '+1-202-555-0123',
            'email': '*****@*****.**'
        }
        assert metainfo.get('j')[0].get_publication() == {
            'title': 'My Publication',
            'authors': 'Myself',
            'journalName': 'Journal of Me',
            'issueDate': '23/12/2014',
            'pages': '12-23',
            'issueNumber': None,
            'identifiers': {}
        }
        assert metainfo.get(
            Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Beispiel #4
0
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
Beispiel #5
0
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def upload_files(connection, files, folder_name):
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S')
    new_folder = fu.create_folder(folder_name, parent=upload,
                                  description='Files uploaded by genestack-uploader')
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, new_folder)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return new_folder, folder_name, accession_file_map
Beispiel #9
0
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**")
    info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123',
                                                     'email': '*****@*****.**'}
        assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself',
                                                          'journalName': 'Journal of Me', 'issueDate': '23/12/2014',
                                                          'pages': '12-23', 'issueNumber': None, 'identifiers': {}}
        assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
def recognize_files(connection, accession_file_map, new_folder):
    # Files Recognition
    fu = FilesUtil(connection)

    application = connection.application('genestack/upload')
    recognised_files = application.invoke('recognizeGroupsByAccession',
                                          accession_file_map.keys())

    recognized_accessions = set()
    for x in recognised_files:
        for sources in x['sourceFileInfos'].values():
            for info in sources:
                recognized_accessions.add(info['accession'])

    created_files = application.invoke('createFiles', recognised_files, [],
                                       None)
    groups = sorted(created_files['files'], key=itemgetter('kind'))
    for name, group in groupby(groups, key=itemgetter('kind')):
        print(name)
        # maybe sort by filename before printing a group?
        for f in group:
            print('\t%s / %s' % (f['accession'], f['name']))

    unrecognized_file_infos = set(accession_file_map) - recognized_accessions

    if unrecognized_file_infos:
        print('Unrecognized Raw Files')
        for accession in unrecognized_file_infos:
            print('\t%s / %s' %
                  (accession, accession_file_map[accession].decode('utf-8')))
        # move unrecognized files to new folder
        unrecognized_folder = fu.create_folder("Unrecognized files",
                                               parent=new_folder)
        for accession in unrecognized_file_infos:
            fu.link_file(accession, unrecognized_folder)
            fu.unlink_file(accession, new_folder)
        print('Unrecognized files moved to %s / %s' %
              (unrecognized_folder, 'Unrecognized files'))
Beispiel #13
0
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files",
                                         parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)

    # use either application name, application ID or "Unknown application" (in this order of preference)
    app_entry = entry.get('application')
    if app_entry:
        application = app_entry.get('name') or app_entry.get(
            'id', "Unknown application")
    else:
        application = "Unknown application"

    # if there is a folder for this group, we add the file to it ;
    # otherwise, we create one, add it to our dictionary of folders and add the file to it
    if application not in grouping_folders:
        new_folder = files_util.create_folder("Files for %s" % application,
                                              parent=output_folder)
        grouping_folders[application] = new_folder
    files_util.link_file(accession, grouping_folders[application])
    if move_files:
        files_util.unlink_file(accession, source_folder)

print "All done! Your files can be found inside the folder with accession %s" % output_folder
print "Collecting files..."
files = files_util.get_file_children(source_folder)
files_count = len(files)
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files", parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)

    # use either application name, application ID or "Unknown application" (in this order of preference)
    app_entry = entry.get('application')
    if app_entry:
        application = app_entry.get('name') or app_entry.get('id', "Unknown application")
    else:
        application = "Unknown application"

    # if there is a folder for this group, we add the file to it ;
    # otherwise, we create one, add it to our dictionary of folders and add the file to it
    if application not in grouping_folders:
        new_folder = files_util.create_folder("Files for %s" % application, parent=output_folder)
        grouping_folders[application] = new_folder
    files_util.link_file(accession, grouping_folders[application])
    if move_files:
        files_util.unlink_file(accession, source_folder)

print "All done! Your files can be found inside the folder with accession %s" % output_folder