def upload_files(connection, files, folder_name, folder_accession): """ :param genestack_client.Connection connection: :param list[str] files: :param str folder_name: :param str folder_accession: """ importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) if not folder_accession: folder_name = folder_name or datetime.now().strftime( 'Upload %d.%m.%y %H:%M:%S') folder_accession = fu.create_folder( folder_name, parent=upload, description='Files uploaded by genestack-uploader') else: folder_name = fu.get_infos([folder_accession])[0]['name'] accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, folder_accession) fu.unlink_file(accession, upload) accession_file_map[accession] = f return folder_accession, folder_name, accession_file_map
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local( '2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == { 'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**' } assert metainfo.get('j')[0].get_publication() == { 'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {} } assert metainfo.get( Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
def upload_files(connection, files, folder_name): importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S') new_folder = fu.create_folder(folder_name, parent=upload, description='Files uploaded by genestack-uploader') accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, new_folder) fu.unlink_file(accession, upload) accession_file_map[accession] = f return new_folder, folder_name, accession_file_map
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**'} assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {}} assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
'csv_file', help='Path to the local comma-delimited CSV file containing the data') parser.add_argument('--name', help='Name of the experiment to create in Genestack') parser.add_argument( '--description', help='Description of the experiment to display in Genestack') args = parser.parse_args() csv_input = args.csv_file print "Connecting to Genestack..." # get connection and application handlers connection = get_connection(args) importer = DataImporter(connection) # file format of the reads to import file_format = UnalignedReads.compose_format_map(UnalignedReads.Space.BASESPACE, UnalignedReads.Format.PHRED33, UnalignedReads.Type.SINGLE) # create the experiment where we will store the data in Genestack experiment = importer.create_experiment(name=args.name or "Imported experiment", description=args.description or "No description provided") print "Created a new experiment with accession %s..." % experiment # parse the CSV file
'cell line': BioMetainfo.CELL_LINE} # parse script arguments parser = make_connection_parser() parser.add_argument('csv_file', help='Path to the local comma-delimited CSV file containing the data') parser.add_argument('--name', help='Name of the experiment to create in Genestack') parser.add_argument('--description', help='Description of the experiment to display in Genestack') args = parser.parse_args() csv_input = args.csv_file print "Connecting to Genestack..." # get connection and application handlers connection = get_connection(args) importer = DataImporter(connection) # file format of the reads to import file_format = UnalignedReads.compose_format_map(UnalignedReads.Space.BASESPACE, UnalignedReads.Format.PHRED33, UnalignedReads.Type.SINGLE) # create the experiment where we will store the data in Genestack experiment = importer.create_experiment(name=args.name or "Imported experiment", description=args.description or "No description provided") print "Created a new experiment with accession %s..." % experiment # parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",")
ENCODE_URL_PATTERN = "https://www.encodeproject.org/files/{0}/@@download/{0}.fastq.gz" # parse script arguments parser = make_connection_parser() parser.add_argument('tsv_file', metavar='<tsv_file>', help='Path to the local tab-delimited file containing the data') args = parser.parse_args() tsv_input = args.tsv_file print('Connecting to Genestack...') # get connection and application handlers connection = get_connection(args) importer = DataImporter(connection) # create the experiment where we will store the data in Genestack experiment = importer.create_experiment(name="ENCODE Human RNA-seq", description="Human RNA-seq assays from ENCODE") print('Created a new experiment with accession %s...' % experiment) created_pairs = set() # parse the CSV file with open(tsv_input, 'r') as the_file: reader = csv.DictReader(the_file, dialect='excel_tab') field_names = reader.fieldnames for file_data in reader: