Example #1
0
def main(workspace, accessions_files):
    headers = [
        "project_id", "accession_id", "species", "cell", "label", "readType",
        "qualities", "file_location", "dataType", "rnaExtract", "localization",
        "lab", "view", "type", "replicate", "file_not_found", "file_size",
        "configuration_file"
    ]

    template = '\t'.join(['%s'] * len(headers)) + '\n'
    output_file = open(os.path.join(workspace, "files.csv"), "w")
    output_file.write('\t'.join(headers) + '\n')

    for input_file in accessions_files:
        accession_file = open(input_file, 'r')
        accessions = parse_accession_file(accession_file)

        project_id = os.path.split(os.path.split(input_file)[0])[-1]
        files = extract_files(accessions)
        for accession_id, item in files:
            file_info = utils.file_info(item['file_location'])
            output_file.write(
                template %
                (project_id, accession_id, item.get(
                    'species', ''), item.get('cell', ''), item.get(
                        'label', ''), item.get('readType', ''),
                 item.get('qualities', ''), item.get('file_location', ''),
                 item.get('dataType', ''), item.get('rnaExtract', ''),
                 item.get('localization', ''), item.get(
                     'lab', ''), item.get('view', ''), item.get('type', ''),
                 item.get('replicate', ''), file_info['file_not_found'],
                 file_info['file_size'], input_file))
        accession_file.close()

    output_file.close()
Example #2
0
def main(workspace, accessions_files):
    headers = ["project_id",
               "accession_id",
               "species",
               "cell",
               "label",
               "readType",
               "qualities",
               "file_location",
               "dataType",
               "rnaExtract",
               "localization",
               "lab",
               "view",
               "type",
               "replicate",
               "file_not_found",
               "file_size",
               "configuration_file"
               ]

    template = '\t'.join(['%s'] * len(headers)) + '\n'
    output_file = open(os.path.join(workspace, "files.csv"), "w")
    output_file.write('\t'.join(headers) + '\n')

    for input_file in accessions_files:
        accession_file = open(input_file, 'r')
        accessions = parse_accession_file(accession_file)

        project_id = os.path.split(os.path.split(input_file)[0])[-1]
        files = extract_files(accessions)
        for accession_id, item in files:
            file_info = utils.file_info(item['file_location'])
            output_file.write(template % (project_id,
                                          accession_id,
                                          item.get('species', ''),
                                          item.get('cell', ''),
                                          item.get('label', ''),
                                          item.get('readType', ''),
                                          item.get('qualities', ''),
                                          item.get('file_location', ''),
                                          item.get('dataType', ''),
                                          item.get('rnaExtract', ''),
                                          item.get('localization', ''),
                                          item.get('lab', ''),
                                          item.get('view', ''),
                                          item.get('type', ''),
                                          item.get('replicate', ''),
                                          file_info['file_not_found'],
                                          file_info['file_size'],
                                          input_file
                                          ))
        accession_file.close()

    output_file.close()
Example #3
0
def main(workspace, annotations_file):
    headers = ("species", "version", "url", "file_location", "file_not_found",
               "file_size")
    template = '\t'.join(['%s'] * len(headers)) + '\n'
    output_file = open(os.path.join(workspace, "annotations.csv"), "w")
    output_file.write(template % headers)
    parser = ConfigParser.RawConfigParser()
    parser.optionxform = lambda s: s
    parser.readfp(annotations_file)

    for section in parser.sections():
        data = dict(parser.items(section))
        data.update(utils.file_info(data['file_location']))
        output_file.write(template % tuple([data[h] for h in headers]))

    annotations_file.close()
    output_file.close()
Example #4
0
def main(workspace, genomes_file):
    headers = ("species",
               "version",
               "url",
               "file_location",
               "file_not_found",
               "file_size")
    template = '\t'.join(['%s'] * len(headers)) + '\n'
    output_file = open(os.path.join(workspace, "genomes.csv"), "w")
    output_file.write(template % headers)
    parser = ConfigParser.RawConfigParser()
    parser.optionxform = lambda s: s
    parser.readfp(genomes_file)

    for section in parser.sections():
        data = dict(parser.items(section))
        data.update(utils.file_info(data['file_location']))
        output_file.write(template % tuple([data[h] for h in headers]))

    genomes_file.close()
    output_file.close()