def main(): import argparse parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( '--key', default='default', help= "The keypair identifier from the keyfile for the server. Default is --key=default" ) parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"), help="The keypair file. Default is --keyfile=%s" % (os.path.expanduser("~/keypairs.json"))) parser.add_argument('--infile', '-i', help="CSV file with metadata to update") parser.add_argument('--dryrun', default=False, action='store_true', help="Do everything except save changes") parser.add_argument('--debug', default=False, action='store_true', help="Print debug messages. Default is False.") parser.add_argument( '--put', default=False, action='store_true', help= "If property in the input is blank, remove that property entirely from the existing object" ) args = parser.parse_args() if args.debug: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) else: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) key = ENC_Key(args.keyfile, args.key) #get the keypair connection = ENC_Connection(key) #initialize the connection object #biosample_collection = ENC_Collection(connection,'biosamples',frame='object') with open(args.infile, 'rU') as f: reader = csv.DictReader(f, delimiter=',', quotechar='"') for new_metadata in reader: uuid = new_metadata.pop('uuid', None) accession = new_metadata.pop('accession', None) if uuid: #use the uuid if there is one obj_id = uuid elif accession: #if no uuid then use the accession obj_id = accession else: #if neither uuid or accession, assume this is a new object obj_id = None enc_object = ENC_Item(connection, obj_id) #print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status')) submit_new_file = False for prop in new_metadata: if new_metadata[prop].strip() == "": if args.put: #if empty, pop out the old property from the object old_value = enc_object.properties.pop(prop, None) continue #skip properties with no value for post or patch else: #new property or new value for old property new_metadata_string = new_metadata[prop] if ':' in prop: prop_name, sep, prop_type = prop.partition(':') else: prop_name = prop prop_type = 'string' if prop_type == 'array': # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"') # array_items = [] # for line in subreader: # for s in line: # array_items.append(s) logger.debug("new_metadata_string is %s" % (new_metadata_string)) array_items = json.loads(new_metadata_string) logger.debug("array_items is %s" % (array_items)) json_obj = {prop_name: array_items} elif prop_type == 'int' or prop_type == 'integer': json_obj = {prop_name: int(new_metadata_string)} elif prop_type == 'float': json_obj = {prop_name: float(new_metadata_string)} else: json_obj = { prop_name: new_metadata_string } #default is string if prop == 'submitted_file_name': new_filename = new_metadata_string old_filename = enc_object.properties[ 'submitted_file_name'] if new_filename != old_filename: submit_new_file = True enc_object.properties.update(json_obj) if submit_new_file: path = os.path.expanduser( enc_object.get('submitted_file_name')) path = os.path.abspath(path) basename = os.path.basename(path) enc_object.properties.update({ 'submitted_file_name': basename, 'md5sum': common.md5(path), 'file_size': os.path.getsize(path) }) if obj_id: logger.info('Syncing %s' % (obj_id)) else: logger.info('Syncing new object') logger.debug('%s' % (json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': ')))) result = enc_object.sync(args.dryrun) if not args.dryrun: try: assert result['status'] == 'success' except: logger.error('New object sync failed ... Skipping. %s' % (result)) else: new_object = result['@graph'][0] if 'accession' in new_object: new_id = new_object['accession'] else: new_id = new_object['uuid'] logger.info("New object: %s" % (new_id)) if enc_object.type == 'file' and 'submitted_file_name' in json_obj: upload_credentials = enc_object.new_creds() logger.debug(upload_credentials) rc = upload_file(upload_credentials, path) logger.info("Upload rc: %d" % (rc))
def main(): import argparse parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--key', default='default', help="The keypair identifier from the keyfile for the server. Default is --key=default") parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"), help="The keypair file. Default is --keyfile=%s" % (os.path.expanduser("~/keypairs.json"))) parser.add_argument('--infile', '-i', help="CSV file with metadata to update") parser.add_argument('--dryrun', default=False, action='store_true', help="Do everything except save changes") parser.add_argument('--debug', default=False, action='store_true', help="Print debug messages. Default is False.") parser.add_argument('--put', default=False, action='store_true', help="If property in the input is blank, remove that property entirely from the existing object") args = parser.parse_args() if args.debug: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) else: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) key = ENC_Key(args.keyfile, args.key) # get the keypair connection = ENC_Connection(key) # initialize the connection object # biosample_collection = ENC_Collection(connection,'biosamples',frame='object') with open(args.infile, 'rU') as f: reader = csv.DictReader(f, delimiter=',', quotechar='"') for new_metadata in reader: uuid = new_metadata.pop('uuid', None) accession = new_metadata.pop('accession', None) if uuid: # use the uuid if there is one obj_id = uuid elif accession: # if no uuid then use the accession obj_id = accession else: # if neither uuid or accession, assume this is a new object obj_id = None enc_object = ENC_Item(connection, obj_id) # print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status')) for prop in new_metadata: if new_metadata[prop].strip() == "": if args.put: # if empty, pop out the old property from the object old_value = enc_object.properties.pop(prop, None) continue # skip properties with no value for post or patch else: # new property or new value for old property new_metadata_string = new_metadata[prop] if ':' in prop: prop_name, sep, prop_type = prop.partition(':') else: prop_name = prop prop_type = 'string' if prop_type == 'array': # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"') # array_items = [] # for line in subreader: # for s in line: # array_items.append(s) print("new_metadata_string is %s" % (new_metadata_string)) array_items = json.loads(new_metadata_string) print("array_items is %s" % (array_items)) json_obj = {prop_name: array_items} elif prop_type == 'int' or prop_type == 'integer': json_obj = {prop_name: int(new_metadata_string)} elif prop_type == 'float': json_obj = {prop_name: float(new_metadata_string)} else: json_obj = {prop_name: new_metadata_string} # default is string enc_object.properties.update(json_obj) if 'submitted_file_name' in enc_object.properties: path = os.path.expanduser(enc_object.get('submitted_file_name')) path = os.path.abspath(path) basename = os.path.basename(path) enc_object.properties.update({ 'submitted_file_name': basename, 'md5sum': common.md5(path), 'file_size': os.path.getsize(path)}) if obj_id: logger.info('Syncing %s' % (obj_id)) else: logger.info('Syncing new object') logger.debug('%s' % (json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': ')))) if not args.dryrun: new_object = enc_object.sync() try: new_accession = new_object['accession'] except: pass else: print("New accession: %s" % (new_accession)) if enc_object.type == 'file' and 'submitted_file_name' in json_obj: upload_credentials = enc_object.new_creds() print(upload_credentials) rc = upload_file(upload_credentials, path) print("Upload rc: %d" % (rc))
def main(): import argparse parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--key', default='default', help="The keypair identifier from the keyfile for the server. Default is --key=default") parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"), help="The keypair file. Default is --keyfile=%s" %(os.path.expanduser("~/keypairs.json"))) parser.add_argument('--infile', '-i', help="file with publications to make") parser.add_argument('--debug', default=False, action='store_true', help="Print debug messages. Default is False.") args = parser.parse_args() if args.debug: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) else: logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) key = ENC_Key(args.keyfile, args.key) #get the keypair connection = ENC_Connection(key) #initialize the connection object obj_id = None search_url= 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&term=' summary_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id=' f = open(args.infile,'rU') reader = f.readlines() for new_publication in reader: split_id = new_publication.partition( ':' ) prefix = split_id[0] pub_id = split_id[2] #must make additional search to get PMID if PMID not supplied if prefix is not 'PMID': search_response = requests.get(search_url + pub_id, headers = connection.headers) search_response_dict = search_response.json() if search_response_dict['esearchresult']['count'] == '0': logging.warning('SEARCH failure. Response = %s' % (search_response.text)) continue pub_id = search_response_dict ['esearchresult']['idlist'][0] #build publication object publication_obj = {'@type': 'publication', 'status': 'published'} enc_object = ENC_Item(connection, obj_id) summary_response = requests.get(summary_url + pub_id, headers = connection.headers) summary_response_dict = summary_response.json() summary_publication = summary_response_dict ['result'][pub_id] publication_obj['title'] = summary_publication['title'] publication_obj['date_published'] = summary_publication['pubdate'] publication_obj['journal'] = summary_publication['source'] publication_obj['volume'] = summary_publication['volume'] publication_obj['issue'] = summary_publication['issue'] publication_obj['page'] = summary_publication['pages'] authors = [] for author in summary_publication['authors']: authors.append(author['name']) publication_obj['authors'] = ', '.join(authors) references = [] for article_id in summary_publication['articleids']: if article_id['idtype'] == 'pubmed': references.append('PMID:' + article_id['value']) elif article_id['idtype'] == 'pmc': references.append('PMCID:' + article_id['value']) elif article_id['idtype'] == 'doi': references.append('doi:' + article_id['value']) publication_obj['references'] = references # will change to identifiers with script update enc_object.properties.update(publication_obj) logging.info('Syncing %s' %(pub_id)) logging.debug('%s' %(json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': ')))) response_dict = enc_object.sync() print response_dict if response_dict['status'] == 'success': posted_object = response_dict['@graph'][0] new_id = posted_object['@id'] print "New ENCODE id number: %s" %(new_id) print json.dumps(posted_object, sort_keys=True, indent=4, separators=(',', ': ')) f.close()