Beispiel #1
0
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__,
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        '--key',
        default='default',
        help=
        "The keypair identifier from the keyfile for the server.  Default is --key=default"
    )

    parser.add_argument('--keyfile',
                        default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile=%s" %
                        (os.path.expanduser("~/keypairs.json")))

    parser.add_argument('--infile',
                        '-i',
                        help="CSV file with metadata to update")

    parser.add_argument('--dryrun',
                        default=False,
                        action='store_true',
                        help="Do everything except save changes")

    parser.add_argument('--debug',
                        default=False,
                        action='store_true',
                        help="Print debug messages.  Default is False.")

    parser.add_argument(
        '--put',
        default=False,
        action='store_true',
        help=
        "If property in the input is blank, remove that property entirely from the existing object"
    )

    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(levelname)s:%(message)s',
                            level=logging.INFO)

    key = ENC_Key(args.keyfile, args.key)  #get the keypair
    connection = ENC_Connection(key)  #initialize the connection object
    #biosample_collection = ENC_Collection(connection,'biosamples',frame='object')

    with open(args.infile, 'rU') as f:
        reader = csv.DictReader(f, delimiter=',', quotechar='"')
        for new_metadata in reader:
            uuid = new_metadata.pop('uuid', None)
            accession = new_metadata.pop('accession', None)
            if uuid:  #use the uuid if there is one
                obj_id = uuid
            elif accession:  #if no uuid then use the accession
                obj_id = accession
            else:  #if neither uuid or accession, assume this is a new object
                obj_id = None
            enc_object = ENC_Item(connection, obj_id)
            #print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status'))
            submit_new_file = False
            for prop in new_metadata:
                if new_metadata[prop].strip() == "":
                    if args.put:  #if empty, pop out the old property from the object
                        old_value = enc_object.properties.pop(prop, None)
                    continue  #skip properties with no value for post or patch
                else:  #new property or new value for old property
                    new_metadata_string = new_metadata[prop]
                    if ':' in prop:
                        prop_name, sep, prop_type = prop.partition(':')
                    else:
                        prop_name = prop
                        prop_type = 'string'
                    if prop_type == 'array':
                        # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"')
                        # array_items = []
                        # for line in subreader:
                        # 	for s in line:
                        # 		array_items.append(s)
                        logger.debug("new_metadata_string is %s" %
                                     (new_metadata_string))
                        array_items = json.loads(new_metadata_string)
                        logger.debug("array_items is %s" % (array_items))
                        json_obj = {prop_name: array_items}
                    elif prop_type == 'int' or prop_type == 'integer':
                        json_obj = {prop_name: int(new_metadata_string)}
                    elif prop_type == 'float':
                        json_obj = {prop_name: float(new_metadata_string)}
                    else:
                        json_obj = {
                            prop_name: new_metadata_string
                        }  #default is string
                    if prop == 'submitted_file_name':
                        new_filename = new_metadata_string
                        old_filename = enc_object.properties[
                            'submitted_file_name']
                        if new_filename != old_filename:
                            submit_new_file = True
                    enc_object.properties.update(json_obj)
            if submit_new_file:
                path = os.path.expanduser(
                    enc_object.get('submitted_file_name'))
                path = os.path.abspath(path)
                basename = os.path.basename(path)
                enc_object.properties.update({
                    'submitted_file_name': basename,
                    'md5sum': common.md5(path),
                    'file_size': os.path.getsize(path)
                })
            if obj_id:
                logger.info('Syncing %s' % (obj_id))
            else:
                logger.info('Syncing new object')
            logger.debug('%s' % (json.dumps(enc_object.properties,
                                            sort_keys=True,
                                            indent=4,
                                            separators=(',', ': '))))
            result = enc_object.sync(args.dryrun)
            if not args.dryrun:
                try:
                    assert result['status'] == 'success'
                except:
                    logger.error('New object sync failed ... Skipping. %s' %
                                 (result))
                else:
                    new_object = result['@graph'][0]
                    if 'accession' in new_object:
                        new_id = new_object['accession']
                    else:
                        new_id = new_object['uuid']
                    logger.info("New object: %s" % (new_id))
                    if enc_object.type == 'file' and 'submitted_file_name' in json_obj:
                        upload_credentials = enc_object.new_creds()
                        logger.debug(upload_credentials)
                        rc = upload_file(upload_credentials, path)
                        logger.info("Upload rc: %d" % (rc))
def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__, epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('--key', default='default',
                        help="The keypair identifier from the keyfile for the server.  Default is --key=default")

    parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile=%s" % (os.path.expanduser("~/keypairs.json")))

    parser.add_argument('--infile', '-i',
                        help="CSV file with metadata to update")

    parser.add_argument('--dryrun', default=False, action='store_true',
                        help="Do everything except save changes")

    parser.add_argument('--debug', default=False, action='store_true',
                        help="Print debug messages.  Default is False.")

    parser.add_argument('--put', default=False, action='store_true',
                        help="If property in the input is blank, remove that property entirely from the existing object")

    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
    else:
        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

    key = ENC_Key(args.keyfile, args.key)  # get the keypair
    connection = ENC_Connection(key)  # initialize the connection object
    # biosample_collection = ENC_Collection(connection,'biosamples',frame='object')

    with open(args.infile, 'rU') as f:
        reader = csv.DictReader(f, delimiter=',', quotechar='"')
        for new_metadata in reader:
            uuid = new_metadata.pop('uuid', None)
            accession = new_metadata.pop('accession', None)
            if uuid:  # use the uuid if there is one
                obj_id = uuid
            elif accession:  # if no uuid then use the accession
                obj_id = accession
            else:  # if neither uuid or accession, assume this is a new object
                obj_id = None
            enc_object = ENC_Item(connection, obj_id)
            # print "Got accessioned object %s with status %s" %(enc_object.get('accession'), enc_object.get('status'))
            for prop in new_metadata:
                if new_metadata[prop].strip() == "":
                    if args.put:  # if empty, pop out the old property from the object
                        old_value = enc_object.properties.pop(prop, None)
                    continue  # skip properties with no value for post or patch
                else:  # new property or new value for old property
                    new_metadata_string = new_metadata[prop]
                    if ':' in prop:
                        prop_name, sep, prop_type = prop.partition(':')
                    else:
                        prop_name = prop
                        prop_type = 'string'
                    if prop_type == 'array':
                        # subreader = csv.reader(StringIO(new_metadata_string), delimiter=',', quotechar='"')
                        # array_items = []
                        # for line in subreader:
                        #   for s in line:
                        #       array_items.append(s)
                        print("new_metadata_string is %s" % (new_metadata_string))
                        array_items = json.loads(new_metadata_string)
                        print("array_items is %s" % (array_items))
                        json_obj = {prop_name: array_items}
                    elif prop_type == 'int' or prop_type == 'integer':
                        json_obj = {prop_name: int(new_metadata_string)}
                    elif prop_type == 'float':
                        json_obj = {prop_name: float(new_metadata_string)}
                    else:
                        json_obj = {prop_name: new_metadata_string}  # default is string
                    enc_object.properties.update(json_obj)
            if 'submitted_file_name' in enc_object.properties:
                path = os.path.expanduser(enc_object.get('submitted_file_name'))
                path = os.path.abspath(path)
                basename = os.path.basename(path)
                enc_object.properties.update({
                    'submitted_file_name': basename,
                    'md5sum': common.md5(path),
                    'file_size': os.path.getsize(path)})
            if obj_id:
                logger.info('Syncing %s' % (obj_id))
            else:
                logger.info('Syncing new object')
            logger.debug('%s' % (json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': '))))
            if not args.dryrun:
                new_object = enc_object.sync()
                try:
                    new_accession = new_object['accession']
                except:
                    pass
                else:
                    print("New accession: %s" % (new_accession))
                    if enc_object.type == 'file' and 'submitted_file_name' in json_obj:
                        upload_credentials = enc_object.new_creds()
                        print(upload_credentials)
                        rc = upload_file(upload_credentials, path)
                        print("Upload rc: %d" % (rc))
def main():

	import argparse
	parser = argparse.ArgumentParser(
		description=__doc__, epilog=EPILOG,
		formatter_class=argparse.RawDescriptionHelpFormatter,
	)
	parser.add_argument('--key',
		default='default',
		help="The keypair identifier from the keyfile for the server.  Default is --key=default")
	parser.add_argument('--keyfile',
		default=os.path.expanduser("~/keypairs.json"),
		help="The keypair file.  Default is --keyfile=%s" %(os.path.expanduser("~/keypairs.json")))
	parser.add_argument('--infile', '-i',
		help="file with publications to make")
	parser.add_argument('--debug',
		default=False,
		action='store_true',
		help="Print debug messages.  Default is False.")

	args = parser.parse_args()

	if args.debug:
		logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
	else:
		logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

	key = ENC_Key(args.keyfile, args.key) #get the keypair
	connection = ENC_Connection(key) #initialize the connection object
	obj_id = None
	search_url= 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&term='
	summary_url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id='

	f = open(args.infile,'rU')
	reader = f.readlines()
	for new_publication in reader:
		split_id = new_publication.partition( ':' )
		prefix = split_id[0]
		pub_id = split_id[2]
		
		#must make additional search to get PMID if PMID not supplied
		if prefix is not 'PMID': 
			search_response = requests.get(search_url + pub_id, headers = connection.headers)
			search_response_dict = search_response.json()
			if search_response_dict['esearchresult']['count'] == '0':
				logging.warning('SEARCH failure. Response = %s' % (search_response.text))
				continue
			pub_id = search_response_dict ['esearchresult']['idlist'][0]
		
		#build publication object
		publication_obj = {'@type': 'publication', 'status': 'published'}
		enc_object = ENC_Item(connection, obj_id)
		summary_response = requests.get(summary_url + pub_id, headers = connection.headers)
		summary_response_dict = summary_response.json()
		summary_publication = summary_response_dict ['result'][pub_id]
		publication_obj['title'] = summary_publication['title']
		publication_obj['date_published'] = summary_publication['pubdate']
		publication_obj['journal'] = summary_publication['source']
		publication_obj['volume'] = summary_publication['volume']
		publication_obj['issue'] = summary_publication['issue']
		publication_obj['page'] = summary_publication['pages']
		authors = []
		for author in summary_publication['authors']:
			authors.append(author['name'])
		publication_obj['authors'] = ', '.join(authors)
		references = []
		for article_id in summary_publication['articleids']:
			if article_id['idtype'] == 'pubmed':
				references.append('PMID:' + article_id['value'])
			elif  article_id['idtype'] == 'pmc':
				references.append('PMCID:' + article_id['value'])
			elif  article_id['idtype'] == 'doi':
				references.append('doi:' + article_id['value'])
		publication_obj['references'] = references # will change to identifiers with script update
		enc_object.properties.update(publication_obj)
		logging.info('Syncing %s' %(pub_id))
		logging.debug('%s' %(json.dumps(enc_object.properties, sort_keys=True, indent=4, separators=(',', ': '))))
		response_dict = enc_object.sync()
		print response_dict
		if response_dict['status'] == 'success':
			posted_object = response_dict['@graph'][0]
			new_id = posted_object['@id']
			print "New ENCODE id number: %s" %(new_id)
			print json.dumps(posted_object, sort_keys=True, indent=4, separators=(',', ': '))
	f.close()