def upload_files(connection, files, folder_name, folder_accession):
    """
    :param genestack_client.Connection connection:
    :param list[str] files:
    :param str folder_name:
    :param str folder_accession:
    """
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    if not folder_accession:
        folder_name = folder_name or datetime.now().strftime(
            'Upload %d.%m.%y %H:%M:%S')
        folder_accession = fu.create_folder(
            folder_name,
            parent=upload,
            description='Files uploaded by genestack-uploader')
    else:
        folder_name = fu.get_infos([folder_accession])[0]['name']
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, folder_accession)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return folder_accession, folder_name, accession_file_map
Beispiel #2
0
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123",
                    "*****@*****.**")
    info.add_publication("j",
                         "My Publication",
                         "Myself",
                         "Journal of Me",
                         "23/12/2014",
                         pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info,
                                                       urls=[TEST_URL],
                                                       parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local(
            '2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {
            'name': 'Rosalind Franklin',
            'phone': '+1-202-555-0123',
            'email': '*****@*****.**'
        }
        assert metainfo.get('j')[0].get_publication() == {
            'title': 'My Publication',
            'authors': 'Myself',
            'journalName': 'Journal of Me',
            'issueDate': '23/12/2014',
            'pages': '12-23',
            'issueNumber': None,
            'identifiers': {}
        }
        assert metainfo.get(
            Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Beispiel #3
0
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
Beispiel #4
0
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_isoforms(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforIsoforms(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in ISOFORM_GROUPS]
        en_file = en.create_file(groups, multi_mapping_corr=True)
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_rna_seq(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforGenes(conn)
    en_file = None
    try:
        groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS]
        en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism")
    finally:
        if (not keep_files) and (en_file is not None):
            fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def upload_files(connection, files, folder_name):
    importer = DataImporter(connection)
    fu = FilesUtil(connection)
    upload = fu.get_special_folder(SpecialFolders.UPLOADED)
    folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S')
    new_folder = fu.create_folder(folder_name, parent=upload,
                                  description='Files uploaded by genestack-uploader')
    accession_file_map = {}
    for f in files:
        accession = importer.load_raw(f)
        fu.link_file(accession, new_folder)
        fu.unlink_file(accession, upload)
        accession_file_map[accession] = f
    return new_folder, folder_name, accession_file_map
Beispiel #8
0
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
def test_en_microarrays(conn, keep_files):
    fu = FilesUtil(conn)
    en = ExpressionNavigatorforMicroarrays(conn)
    norm_app = AffymetrixMicroarraysNormalizationApplication(conn)
    en_file = None
    norm_file = None
    try:
        groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS]
        groups[0]['is_control'] = True
        norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group])
        en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION)
    finally:
        if not keep_files:
            created = fu.get_special_folder(SpecialFolders.CREATED)
            for f in (norm_file, en_file):
                if f is not None:
                    fu.unlink_file(f, created)
def test_metainfo_io(conn):
    data_importer = DataImporter(conn)
    fu = FilesUtil(conn)

    created = fu.get_special_folder(SpecialFolders.CREATED)
    info = Metainfo()
    info.add_boolean("a", True)
    info.add_file_reference("b", created)
    info.add_date_time("c", "2015-12-13")
    info.add_integer("d", 239)
    info.add_decimal("e", 238.583)
    info.add_decimal("e", -13.4)
    info.add_string("f", "hello")
    info.add_memory_size("g", 2847633)
    info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**")
    info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23")
    info.add_value(Metainfo.NAME, StringValue("Test report file"))
    report_file = None
    try:
        report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created)
        metainfo = next(iter(fu.collect_metainfos([report_file])))
        assert metainfo.get('a')[0].get_boolean()
        assert isinstance(metainfo.get('b')[0].get_accession(), str)
        assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d')
        assert metainfo.get('d')[0].get_int() == 239
        assert metainfo.get('e')[0].get_decimal() == 238.583
        assert metainfo.get('e')[1].get_decimal() == -13.4
        assert metainfo.get('f')[0].get_string() == "hello"
        assert metainfo.get('g')[0].get_int() == 2847633
        assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123',
                                                     'email': '*****@*****.**'}
        assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself',
                                                          'journalName': 'Journal of Me', 'issueDate': '23/12/2014',
                                                          'pages': '12-23', 'issueNumber': None, 'identifiers': {}}
        assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file"
        assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL
    finally:
        if report_file is not None:
            fu.unlink_file(report_file, created)
Beispiel #11
0
        '--name',
        default="New Project",
        help='Name of the Genestack folder where to put the output files')
    parser.add_argument(
        '--ref-genome',
        help='Accession of the reference genome to use for the mapping step')

    args = parser.parse_args()
    project_name = args.name

    print "Connecting to Genestack..."

    # get connection and create output folder
    connection = get_connection(args)
    files_util = FilesUtil(connection)
    created_files_folder = files_util.get_special_folder(
        SpecialFolders.CREATED)
    project_folder = files_util.create_folder(project_name,
                                              parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app,
                                             project_folder,
                                             "Mapped Reads",
                                             ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder,
                                          "Mapped Reads QC")
    vc_creator = BatchFilesCreator(variant_calling_app,
    parser = make_connection_parser()
    parser.add_argument('raw_reads_folder',
                        help='Genestack accession of the folder containing the raw reads files to process')
    parser.add_argument('--name', default="New Project",
                        help='Name of the Genestack folder where to put the output files')
    parser.add_argument('--ref-genome', help='Accession of the reference genome to use for the mapping step')

    args = parser.parse_args()
    project_name = args.name

    print('Connecting to Genestack...')

    # get connection and create output folder
    connection = get_connection(args)
    files_util = FilesUtil(connection)
    created_files_folder = files_util.get_special_folder(SpecialFolders.CREATED)
    project_folder = files_util.create_folder(project_name, parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC")
    vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS)

    # collect files
    print('Collecting raw reads...')
    raw_reads = files_util.get_file_children(args.raw_reads_folder)
    files_count = len(raw_reads)