def mark_obsolete(self, accession): """ Mark Genestack file as obsolete one by adding corresponding key to metainfo. :param accession: accession of file :return: None """ metainfo = Metainfo() metainfo.add_boolean('genestack:obsolete', True) self.add_metainfo_values(accession, metainfo)
def mark_for_tests(self, app_file): """ Mark Genestack file as test one by adding corresponding key to metainfo. Test file will calculate md5 checksums of its encapsulated physical files during initialization. :param app_file: accession of file :return: None """ metainfo = Metainfo() metainfo.add_boolean(CALCULATE_CHECKSUMS_KEY, True) self.add_metainfo_values(app_file, metainfo)
def _fill_dataset_metainfo(dataset_metainfo, name): dataset_metainfo = dataset_metainfo or Metainfo() if Metainfo.NAME in dataset_metainfo: raise GenestackException( 'Provided metainfo must not have "%s" field set' % Metainfo.NAME) dataset_metainfo.add_string(Metainfo.NAME, name) return dataset_metainfo
def add_checksums(self, app_file, expected_checksums): """ Add expected MD5 checksum to the metainfo of a CLA file. Expected checksums are calculated in the following way: - The number of checksums equals number of entries in storage. For instance, a Reference Genome file has 2 entries (annotation and sequence files). - If there are multiple files in one entry, they will be concatenated in the same order as they were ``PUT`` to storage by the initialization script. - If a file is marked for testing, then after initialization its metainfo will contain both expected and actual checksum values. :param app_file: accession of application file :param expected_checksums: collection of MD5 checksums :return: None """ metainfo = Metainfo() for key, value in expected_checksums.items(): metainfo.add_string('%s%s' % (EXPECTED_CHECKSUM_PREFIX, key), value) self.add_metainfo_values(app_file, metainfo)
def collect_metainfos(self, accessions): """ Get complete metainfo of a list of files. :param accessions: list of accessions :type accessions: list[str] :return: list of metainfo objects :rtype: list[Metainfo] """ return [ Metainfo.parse_metainfo_from_dict(mi) for mi in self.invoke('getMetainfo', accessions) ]
def create_folder(self, name, parent=None, description=None, metainfo=None): """ Create a folder. :param name: name of the folder :type name: str :param parent: if not specified, create folder in the user's private folder :type parent: str :param description: description of the folder (goes into the metainfo) :type description: str :param metainfo: additional Metainfo. Description and accession should be specified either via arguments or in a metainfo object (but not in both). :type metainfo: Metainfo :return: accession of created folder """ metainfo = metainfo or Metainfo() metainfo.add_string(Metainfo.NAME, name) if description is not None: metainfo.add_string(Metainfo.DESCRIPTION, description) return self.invoke('createFolder', parent, metainfo)
def _copy_metainfo(metainfo): return Metainfo(metainfo) if metainfo else Metainfo()
created_pairs = set() # parse the CSV file with open(tsv_input, 'r') as the_file: reader = csv.DictReader(the_file, dialect='excel_tab') field_names = reader.fieldnames for file_data in reader: # skip the entry if the file was already included in a previously created paired-end assay if file_data[FILE_ACCESSION] in created_pairs: continue # for each entry, prepare a Metainfo object metainfo = Metainfo() for key in VALID_FIELDS.keys(): metainfo.add_string(VALID_FIELDS.get(key) or key, file_data[key]) metainfo.add_external_link(BioMetaKeys.READS_LINK, ENCODE_URL_PATTERN.format(file_data[FILE_ACCESSION])) if file_data.get(PAIRED_ACCESSION): # add URL of second mate if the reads are paired-end metainfo.add_string(FILE_ACCESSION, PAIRED_ACCESSION) metainfo.add_external_link(BioMetaKeys.READS_LINK, ENCODE_URL_PATTERN.format(file_data[PAIRED_ACCESSION])) created_pairs.add(file_data[PAIRED_ACCESSION]) # create the sequencing assay on Genestack created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo) print('Created file "%s" (%s)' % (file_data[FILE_ACCESSION], created_file))
# parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames # check if mandatory keys are in the CSV file for mandatory_key in MANDATORY_KEYS: if mandatory_key not in field_names: raise GenestackException("The key '%s' must be supplied in the CSV file" % mandatory_key) for file_data in reader: # for each entry, prepare a Metainfo object metainfo = Metainfo() for key in field_names: # 'link' and 'organism' are treated separately, as they are added to the metainfo using specific methods if key == "link": url = file_data[key] metainfo.add_external_link(key=BioMetaKeys.READS_LINK, text="link", url=url, fmt=file_format) elif key == "organism": metainfo.add_string(BioMetaKeys.ORGANISM, file_data[key]) # all the other keys are added as strings else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) metainfo.add_string(metainfo_key, file_data[key]) # create the sequencing assay on Genestack created_file = importer.create_sequencing_assay(experiment, metainfo=metainfo)
if args.local_key not in field_names: raise GenestackException( "Error: the local key %s is not present in the supplied CSV file" % args.local_key) for file_data in reader: # find the corresponding file local_identifier = file_data[local_key] remote_file = identifier_map.get(local_identifier) if not remote_file: print('Warning: no match found for file name "%s"' % local_identifier) continue # prepare a Metainfo object metainfo = Metainfo() for key in field_names: # key parsing logic value = file_data[key] if value == "" or value is None: continue if key == args.local_key: continue if key == "organism": metainfo.add_string(BioMetaKeys.ORGANISM, value) else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) if parse_as_boolean(value) is not None: metainfo.add_boolean(metainfo_key, parse_as_boolean(value)) else:
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local( '2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == { 'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**' } assert metainfo.get('j')[0].get_publication() == { 'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {} } assert metainfo.get( Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**'} assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {}} assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
def metainfo(): return Metainfo()
reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames if args.local_key not in field_names: raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key) for file_data in reader: # find the corresponding file local_identifier = file_data[local_key] remote_file = identifier_map.get(local_identifier) if not remote_file: print('Warning: no match found for file name "%s"' % local_identifier) continue # prepare a Metainfo object metainfo = Metainfo() for key in field_names: # key parsing logic value = file_data[key] if value == "" or value is None: continue if key == args.local_key: continue if key == "organism": metainfo.add_string(BioMetaKeys.ORGANISM, value) else: metainfo_key = SPECIAL_KEYS.get(key.lower(), key) if parse_as_boolean(value) is not None: metainfo.add_boolean(metainfo_key, parse_as_boolean(value)) else: metainfo.add_string(metainfo_key, value)