parser.add_argument(
        'folder',
        help='Accession of the Genestack folder containing the files')

    args = parser.parse_args()
    csv_input = args.csv_file
    local_key = args.local_key

    print('Connecting to Genestack...')

    # get connection and application handlers
    connection = get_connection(args)
    files_util = FilesUtil(connection)

    print('Collecting files...')
    files = files_util.get_file_children(args.folder)
    print('Found %d files. Collecting metadata...' % len(files))
    infos = files_util.get_infos(files)

    identifier_map = {info['name']: info['accession'] for info in infos}

    # parse the CSV file
    with open(csv_input, 'r') as the_file:
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException(
                "Error: the local key %s is not present in the supplied CSV file"
                % args.local_key)
예제 #2
0
    '--move-files',
    action='store_true',
    help=
    'If present, the original files will be unlinked from the source folder')
args = parser.parse_args()
source_folder = args.folder
move_files = args.move_files

print "Connecting to Genestack..."

# get connection and application handlers
connection = get_connection(args)
files_util = FilesUtil(connection)

print "Collecting files..."
files = files_util.get_file_children(source_folder)
files_count = len(files)
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files",
                                         parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)

    # use either application name, application ID or "Unknown application" (in this order of preference)
    app_entry = entry.get('application')
    if app_entry:
예제 #3
0
                                              parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app,
                                             project_folder,
                                             "Mapped Reads",
                                             ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder,
                                          "Mapped Reads QC")
    vc_creator = BatchFilesCreator(variant_calling_app,
                                   project_folder,
                                   "Variants",
                                   custom_args=VC_ARGUMENTS_NO_INDELS)

    # collect files
    print "Collecting raw reads..."
    raw_reads = files_util.get_file_children(args.raw_reads_folder)
    files_count = len(raw_reads)
    print "Found %d files to process" % files_count

    # Create pipeline files
    mapped_reads = bowtie_creator.create_files(raw_reads)
    mapped_reads_qcs = mapped_qc_creator.create_files(mapped_reads)
    vc_creator.create_files(mapped_reads)

    print "All done! Your files are in the folder %s" % project_folder
    parser.add_argument('csv_file', help='Path to the local comma-delimited CSV file containing the data')
    parser.add_argument('local_key', help='Name of the local key to match CSV records and Genestack files names')
    parser.add_argument('folder', help='Accession of the Genestack folder containing the files')

    args = parser.parse_args()
    csv_input = args.csv_file
    local_key = args.local_key

    print "Connecting to Genestack..."

    # get connection and application handlers
    connection = get_connection(args)
    files_util = FilesUtil(connection)

    print "Collecting files..."
    files = files_util.get_file_children(args.folder)
    print "Found %d files. Collecting metadata..." % len(files)
    infos = files_util.get_infos(files)

    identifier_map = {info['name']: info['accession'] for info in infos}

    # parse the CSV file
    with open(csv_input, 'r') as the_file:
        reader = csv.DictReader(the_file, delimiter=",")
        field_names = reader.fieldnames

        if args.local_key not in field_names:
            raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key)

        for file_data in reader:
            # find the corresponding file
예제 #5
0
    print('Connecting to Genestack...')

    # get connection and create output folder
    connection = get_connection(args)
    files_util = FilesUtil(connection)
    created_files_folder = files_util.get_special_folder(SpecialFolders.CREATED)
    project_folder = files_util.create_folder(project_name, parent=created_files_folder)

    # create application wrappers and batch files creators
    bowtie_app = BowtieApplication(connection)
    mapped_qc_app = AlignedReadsQC(connection)
    variant_calling_app = VariationCaller2Application(connection)

    bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome)
    mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC")
    vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS)

    # collect files
    print('Collecting raw reads...')
    raw_reads = files_util.get_file_children(args.raw_reads_folder)
    files_count = len(raw_reads)
    print('Found %d files to process' % files_count)

    # Create pipeline files
    mapped_reads = bowtie_creator.create_files(raw_reads)
    mapped_reads_qcs = mapped_qc_creator.create_files(mapped_reads)
    vc_creator.create_files(mapped_reads)

    print('All done! Your files are in the folder %s' % project_folder)
parser = make_connection_parser()
parser.add_argument('folder', help='Accession of the Genestack folder storing the files to group by application')
parser.add_argument('--move-files', action='store_true',
                    help='If present, the original files will be unlinked from the source folder')
args = parser.parse_args()
source_folder = args.folder
move_files = args.move_files

print "Connecting to Genestack..."

# get connection and application handlers
connection = get_connection(args)
files_util = FilesUtil(connection)

print "Collecting files..."
files = files_util.get_file_children(source_folder)
files_count = len(files)
print "Found %d files to organise. Retrieving infos..." % files_count
infos = files_util.get_complete_infos(files)

output_folder = files_util.create_folder("Organized files", parent=source_folder)
grouping_folders = {}

for i, entry in enumerate(infos, 1):
    accession = entry['accession']
    print "Processing file %d of %d (%s)..." % (i, files_count, accession)

    # use either application name, application ID or "Unknown application" (in this order of preference)
    app_entry = entry.get('application')
    if app_entry:
        application = app_entry.get('name') or app_entry.get('id', "Unknown application")