def check_ENCODE(self, idList, connection, otherIdList=[], bothDicts={}): for pmid in idList: extraData = bothDicts.get(pmid) ENCODEvalue = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=PMID:" + pmid, connection) if ENCODEvalue.get("@graph"): log = "PMID " + pmid + " is listed in ENCODE" logger.info('%s' % log) uuid = ENCODEvalue.get("@graph")[0].get("uuid") if not self.CREATE_ONLY: self.compare_entrez_ENCODE(uuid, pmid, connection, extraData) else: if self.CREATE_ONLY: self.get_entrez([pmid]) titleEntrez = self.entrezDict[pmid].get("title") found = False for otherID in otherIdList: titleENCODE = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=" + otherID, connection) if titleENCODE.get("title") == titleEntrez: log = pmid + " is in ENCODE by a different name " + titleENCODE.get("uuid") logger.warning('%s' % log) self.compare_entrez_ENCODE(titleENCODE.get("uuid"), pmid, connection, extraData) if self.UPDATE: newIdent = titleENCODE.get("identifiers") newIdent.append("PMID:" + pmid) patch_dict = {"identifiers": newIdent} encodedcc.patch_ENCODE(titleENCODE.get("uuid"), connection, patch_dict) found = True if found is False: log = "This publication is not listed in ENCODE " + pmid logger.warning('%s' % log) if self.CREATE: self.POST_COUNT += 1 pmidData = self.entrezDict[pmid] log = "POSTing the new object: " + pmid logger.info('%s' % log) post_dict = { "title": pmidData.get("title"), "abstract": pmidData.get("abstract"), "submitted_by": "/users/8b1f8780-b5d6-4fb7-a5a2-ddcec9054288/", "lab": "/labs/encode-consortium/", "award": "/awards/ENCODE/", "categories": extraData.get("categories"), "published_by": extraData.get("published_by"), "date_published": pmidData.get("date_published"), "authors": pmidData.get("authors"), "identifiers": ["PMID:" + pmid], "journal": pmidData.get("journal"), "volume": pmidData.get("volume"), "issue": pmidData.get("issue"), "page": pmidData.get("page"), "status": "published" } if extraData.get("data_used"): post_dict["data_used"] = extraData.get("data_used") encodedcc.new_ENCODE(connection, "publications", post_dict)
def file_manager(key, value, connection, obj_type): filename = key.split("/")[-1] print("Downloading {}".format(filename)) r = requests.get(key) with open(filename, "wb") as outfile: outfile.write(r.content) if obj_type == "Biosample": filepart = filename.split("-")[0] else: filepart = filename.split("-")[1] attach = attachment(filename) temp = "_".join(key.split("/")[-2:]) aliases = ["brenton-graveley:" + temp] if (encodedcc.get_ENCODE(quote(aliases[0]), connection)['status']) != 'error': removing_patch = {'status': 'deleted', 'aliases': []} print('DELETING ' + aliases[0] + ' ' + str(removing_patch)) encodedcc.patch_ENCODE(quote(aliases[0]), connection, removing_patch) upload = { "aliases": aliases, "attachment": attach, "award": "U54HG007005", "document_type": "general protocol", "lab": "/labs/brenton-graveley/", "status": "released", "description": "{obj_type} protocol for {filepart} shRNA followed by RNA-seq".format( obj_type=obj_type, filepart=filepart), } print("Uploading {} as {}".format(filename, aliases[0])) encodedcc.new_ENCODE(connection, "Document", upload) print("Patching {} with document {}".format(value, aliases[0])) if obj_type == "Biosample": docs = {"protocol_documents": aliases} else: docs = {"documents": aliases} encodedcc.patch_ENCODE(quote(value), connection, docs) print("Removing document {}".format(filename)) subprocess.run(["rm", filename]) '''
def file_manager(key, value, connection, obj_type): filename = key.split("/")[-1] print("Downloading {}".format(filename)) r = requests.get(key) with open(filename, "wb") as outfile: outfile.write(r.content) if obj_type == "Biosample": filepart = filename.split("-")[0] else: filepart = filename.split("-")[1] attach = attachment(filename) temp = "_".join(key.split("/")[-2:]) aliases = ["brenton-graveley:" + temp] if (encodedcc.get_ENCODE(quote(aliases[0]), connection)['status']) != 'error': removing_patch = {'status':'deleted', 'aliases': []} print ('DELETING ' + aliases[0] + ' ' + str(removing_patch)) encodedcc.patch_ENCODE(quote(aliases[0]), connection, removing_patch) upload = {"aliases": aliases, "attachment": attach, "award": "U54HG007005", "document_type": "general protocol", "lab": "/labs/brenton-graveley/", "status": "released", "description": "{obj_type} protocol for {filepart} shRNA followed by RNA-seq".format(obj_type=obj_type, filepart=filepart), } print("Uploading {} as {}".format(filename, aliases[0])) encodedcc.new_ENCODE(connection, "Document", upload) print("Patching {} with document {}".format(value, aliases[0])) if obj_type == "Biosample": docs = {"protocol_documents": aliases} else: docs = {"documents": aliases} encodedcc.patch_ENCODE(quote(value), connection, docs) print("Removing document {}".format(filename)) subprocess.run(["rm", filename]) '''
def excel_reader(datafile, sheet, update, connection, patchall): row = reader(datafile, sheetname=sheet) keys = next(row) # grab the first row of headers total = 0 error = 0 success = 0 patch = 0 for values in row: total += 1 post_json = dict(zip(keys, values)) post_json = dict_patcher(post_json) # add attchments here if post_json.get("attachment"): attach = attachment(post_json["attachment"]) post_json["attachment"] = attach print(post_json) temp = {} if post_json.get("uuid"): temp = encodedcc.get_ENCODE(post_json["uuid"], connection) elif post_json.get("aliases"): temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]), connection) elif post_json.get("accession"): temp = encodedcc.get_ENCODE(post_json["accession"], connection) elif post_json.get("@id"): temp = encodedcc.get_ENCODE(post_json["@id"], connection) if temp.get("uuid"): if patchall: e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: print("Object {} already exists. Would you like to patch it instead?".format(temp["uuid"])) i = input("PATCH? y/n ") if i.lower() == "y": e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: if update: print("POSTing data!") e = encodedcc.new_ENCODE(connection, sheet, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 print("{sheet}: {success} out of {total} posted, {error} errors, {patch} patched".format( sheet=sheet.upper(), success=success, total=total, error=error, patch=patch))
def excel_reader(datafile, sheet, update, connection, patchall): row = reader(datafile, sheetname=sheet) keys = next(row) # grab the first row of headers total = 0 error = 0 success = 0 patch = 0 json_properties = encodedcc.get_ENCODE('/profiles/{}.json'.format(sheet), connection)['properties'] new_accessions_aliases = [] failed_postings = [] for values in row: total += 1 post_json = dict(zip(keys, values)) post_json = dict_patcher(post_json) post_json = expose_objects(post_json, json_properties) # add attchments here if post_json.get("attachment"): attach = attachment(post_json["attachment"]) post_json["attachment"] = attach print(post_json) temp = {} # Silence get_ENCODE failures. with encodedcc.print_muted(): if post_json.get("uuid"): temp = encodedcc.get_ENCODE(post_json["uuid"], connection) elif post_json.get("aliases"): temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]), connection) elif post_json.get("accession"): temp = encodedcc.get_ENCODE(post_json["accession"], connection) elif post_json.get("@id"): temp = encodedcc.get_ENCODE(post_json["@id"], connection) if temp.get("uuid"): if patchall: e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: print( "Object {} already exists. Would you like to patch it instead?" .format(temp["uuid"])) i = input("PATCH? y/n ") if i.lower() == "y": e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: if update: print("POSTing data!") e = encodedcc.new_ENCODE(connection, sheet, post_json) if e["status"] == "error": error += 1 failed_postings.append( post_json.get('aliases', 'alias not specified')) elif e["status"] == "success": new_object = e['@graph'][0] # Print now and later. print('New accession/UUID: {}'.format( (new_object.get('accession', new_object.get('uuid'))))) new_accessions_aliases.append( (new_object.get('accession', new_object.get('uuid')), new_object.get('aliases'))) success += 1 print( "{sheet}: {success} out of {total} posted, {error} errors, {patch} patched" .format(sheet=sheet.upper(), success=success, total=total, error=error, patch=patch)) if new_accessions_aliases: print('New accession/UUID and alias:' if len(new_accessions_aliases) == 1 else 'New accessions/UUIDs and aliases:') for (accession, alias) in new_accessions_aliases: if len(alias) == 0: alias = 'alias not specified' else: alias = ', '.join(alias) if isinstance(alias, list) else alias print(accession, alias) if failed_postings: print('Posting failed for {} object(s):'.format(len(failed_postings))) for alias in failed_postings: print(', '.join(alias) if isinstance(alias, list) else alias)
def excel_reader(datafile, sheet, update, connection, patchall): row = reader(datafile, sheetname=sheet) keys = next(row) # grab the first row of headers total = 0 error = 0 success = 0 patch = 0 for values in row: total += 1 post_json = dict(zip(keys, values)) post_json = dict_patcher(post_json) # add attchments here if post_json.get("attachment"): attach = attachment(post_json["attachment"]) post_json["attachment"] = attach print(post_json) temp = {} if post_json.get("uuid"): temp = encodedcc.get_ENCODE(post_json["uuid"], connection) elif post_json.get("aliases"): temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]), connection) elif post_json.get("accession"): temp = encodedcc.get_ENCODE(post_json["accession"], connection) elif post_json.get("@id"): temp = encodedcc.get_ENCODE(post_json["@id"], connection) if temp.get("uuid"): if patchall: e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: print( "Object {} already exists. Would you like to patch it instead?" .format(temp["uuid"])) i = input("PATCH? y/n ") if i.lower() == "y": e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 patch += 1 else: if update: print("POSTing data!") e = encodedcc.new_ENCODE(connection, sheet, post_json) if e["status"] == "error": error += 1 elif e["status"] == "success": success += 1 print( "{sheet}: {success} out of {total} posted, {error} errors, {patch} patched" .format(sheet=sheet.upper(), success=success, total=total, error=error, patch=patch))
def main(): import argparse parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( '--infile', '-i', help="File containing the JSON object as a JSON string.") parser.add_argument('--server', help="Full URL of the server.") parser.add_argument('--key', default='default', help="The keypair identifier from the keyfile. \ Default is --key=default") parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"), help="The keypair file. Default is --keyfile\ =%s" % (os.path.expanduser("~/keypairs.json"))) parser.add_argument('--authid', help="The HTTP auth ID.") parser.add_argument('--authpw', help="The HTTP auth PW.") parser.add_argument( '--force-put', default=False, action='store_true', help="Force the object to be PUT rather than PATCHed. \ Default is False.") parser.add_argument('--get-only', default=False, action='store_true', help="Do nothing but get the object and print it. \ Default is False.") parser.add_argument('--id', help="URI for an object"), parser.add_argument('--debug', default=False, action='store_true', help="Print debug messages. Default is False.") parser.add_argument( '--frame', help= "define a frame to get back the JSON object, for use with --id. Default is frame=object", default="object") parser.add_argument('--type', help="the object's type") parser.add_argument( '--update', default=False, action='store_true', help="Let the script PATCH/POST the data. Default is False") args = parser.parse_args() global DEBUG_ON DEBUG_ON = args.debug if args.get_only: GET_ONLY = True else: GET_ONLY = False key = encodedcc.ENC_Key(args.keyfile, args.key) if args.server and args.authpw and args.authid: key.server = args.server key.authid = args.authid key.authpw = args.authpw print("Creating authorization data from command line inputs") connection = encodedcc.ENC_Connection(key) print("Running on {}".format(connection.server)) if args.update: print( "This is an UPDATE run! Data will be PATCHed or POSTed accordingly" ) else: print("This is a dry run, no data will be changed") new_object = False if args.id: GET_ONLY = True print("Taking id to get from --id") new_json = {} uuid_response = {} accession_response = {} try: id_response = encodedcc.get_ENCODE(args.id, connection, frame=args.frame) except: id_response = {} new_object = True else: if args.infile: infile = open(args.infile, 'r') else: infile = sys.stdin new_json_string = infile.read() new_json = json.loads(new_json_string) if args.debug: encodedcc.pprint_ENCODE(new_json) if '@id' in new_json: id_response = encodedcc.get_ENCODE(new_json['@id'], connection) if id_response.get("code") == 404: id_response = {} new_object = True else: id_response = {} new_object = True if 'uuid' in new_json: uuid_response = encodedcc.get_ENCODE(new_json['uuid'], connection) if uuid_response.get("code") == 404: uuid_response = {} new_object = True else: uuid_response = {} new_object = True if 'accession' in new_json: accession_response = encodedcc.get_ENCODE(new_json['accession'], connection) if accession_response.get("code") == 404: accession_response = {} new_object = True else: accession_response = {} new_object = True if new_object: print( "No identifier in new JSON object. Assuming POST or PUT with auto-accessioning." ) object_exists = False if id_response: object_exists = True print("Found matching @id:") encodedcc.pprint_ENCODE(id_response) if uuid_response: object_exists = True print("Found matching uuid:") encodedcc.pprint_ENCODE(uuid_response) if accession_response: object_exists = True print("Found matching accession") encodedcc.pprint_ENCODE(accession_response) if id_response and uuid_response and (id_response != uuid_response): print("Existing id/uuid mismatch") if id_response and accession_response and (id_response != accession_response): print("Existing id/accession mismatch") if uuid_response and accession_response and (uuid_response != accession_response): print("Existing uuid/accession mismatch") if new_object and object_exists: print( "Conflict: At least one identifier already exists and at least one does not exist" ) profiles = encodedcc.get_ENCODE("/profiles/", connection) supported_collections = list(profiles.keys()) if "Dataset" not in supported_collections: supported_collections.append("Dataset") type_list = new_json.pop('@type', []) if args.type: type_list = [args.type] if any(type_list): findit = False for x in supported_collections: if x.lower() == type_list[0].lower(): type_list = [x] findit = True if findit: if args.debug: print("Object will have type of", type_list[0]) else: print( "Error! JSON object does not contain one of the supported types" ) print("Provided type:", type_list[0]) print( "Please either change the JSON file or define the type with the --type feature" ) sys.exit(1) else: print("No type found for JSON object!") sys.exit(1) possible_collections = [x for x in type_list if x in supported_collections] if possible_collections: # collection = possible_collections[0] + 's/' collection = possible_collections[0] else: collection = [] if '@id' in new_json: identifier = new_json.pop('@id') elif 'uuid' in new_json: if collection: identifier = '/' + collection + '/' + new_json['uuid'] + '/' else: identifier = '/' + new_json['uuid'] + '/' elif 'accession' in new_json: if collection: identifier = '/' + collection + '/' + new_json['accession'] + '/' else: identifier = '/' + new_json['accession'] + '/' if 'attachment' in new_json: if 'href' in new_json['attachment']: pass else: try: filename = new_json['attachment']['download'] print("Setting filename to %s" % (filename)) except: print("Must specify either href or filename for attachment", file=sys.stderr) if new_json['attachment'].get('type'): mime_type = new_json['attachment'].get('type') else: try: mime_type, encoding = mimetypes.guess_type(filename) major, minor = mime_type.split('/') #detected_type = magic.from_file(filename, mime=True) print("Detected mime type %s" % (mime_type)) except: print("Failed to detect mime type in file %s" % (filename), file=sys.stderr) try: with open(filename, 'rb') as stream: print("opened") newvalue = { 'download': filename, # Just echoes the given filename as the download name 'type': mime_type, 'href': 'data:%s;base64,%s' % (mime_type, b64encode(stream.read())) } f = open('tmp', 'w') print(f, newvalue) new_json.update({'attachment': newvalue}) # add except: print("Cannot open file %s" % (filename), file=sys.stderr) if object_exists: if args.force_put: if not GET_ONLY: print("Replacing existing object") if args.update: e = encodedcc.replace_ENCODE(identifier, connection, new_json) print(e) else: if not GET_ONLY: print("PATCHing existing object") if args.update: e = encodedcc.patch_ENCODE(identifier, connection, new_json) print(e) elif new_object: if args.force_put: if not GET_ONLY: print("PUT'ing new object") if args.update: e = encodedcc.replace_ENCODE(identifier, connection, new_json) print(e) else: if not GET_ONLY: print("POST'ing new object") if not any(collection): print( "ERROR: Unable to POST to non-existing collection {}". format(collection)) sys.exit(1) if args.update: e = encodedcc.new_ENCODE(connection, collection, new_json) print(e)
def main(): import argparse parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument('--infile', '-i', help="File containing the JSON object as a JSON string.") parser.add_argument('--server', help="Full URL of the server.") parser.add_argument('--key', default='default', help="The keypair identifier from the keyfile. \ Default is --key=default") parser.add_argument('--keyfile', default=os.path.expanduser("~/keypairs.json"), help="The keypair file. Default is --keyfile\ =%s" % (os.path.expanduser("~/keypairs.json"))) parser.add_argument('--authid', help="The HTTP auth ID.") parser.add_argument('--authpw', help="The HTTP auth PW.") parser.add_argument('--force-put', default=False, action='store_true', help="Force the object to be PUT rather than PATCHed. \ Default is False.") parser.add_argument('--get-only', default=False, action='store_true', help="Do nothing but get the object and print it. \ Default is False.") parser.add_argument('--id', help="URI for an object"), parser.add_argument('--debug', default=False, action='store_true', help="Print debug messages. Default is False.") parser.add_argument('--frame', help="define a frame to get back the JSON object, for use with --id. Default is frame=object", default="object") parser.add_argument('--type', help="the object's type") args = parser.parse_args() global DEBUG_ON DEBUG_ON = args.debug if args.get_only: GET_ONLY = True else: GET_ONLY = False key = encodedcc.ENC_Key(args.keyfile, args.key) connection = encodedcc.ENC_Connection(key) new_object = False if args.id: GET_ONLY = True print("Taking id to get from --id") new_json = {} uuid_response = {} accession_response = {} try: id_response = encodedcc.get_ENCODE(args.id, connection, frame=args.frame) except: id_response = {} new_object = True else: if args.infile: infile = open(args.infile, 'r') else: infile = sys.stdin new_json_string = infile.read() new_json = json.loads(new_json_string) if '@id' in new_json: try: id_response = encodedcc.get_ENCODE(new_json['@id'], connection) except: id_response = {} new_object = True else: id_response = {} if 'uuid' in new_json: try: uuid_response = encodedcc.get_ENCODE(new_json['uuid'], connection) except: uuid_response = {} new_object = True else: uuid_response = {} if 'accession' in new_json: try: accession_response = encodedcc.get_ENCODE(new_json['accession'], connection) except: accession_response = {} new_object = True else: print("No identifier in new JSON object. Assuming POST or PUT with auto-accessioning.") new_object = True accession_response = {} object_exists = False if id_response: object_exists = True print("Found matching @id:") encodedcc.pprint_ENCODE(id_response) if uuid_response: object_exists = True print("Found matching uuid:") encodedcc.pprint_ENCODE(uuid_response) if accession_response: object_exists = True print("Found matching accession") encodedcc.pprint_ENCODE(accession_response) if id_response and uuid_response and (id_response != uuid_response): print("Existing id/uuid mismatch") if id_response and accession_response and (id_response != accession_response): print("Existing id/accession mismatch") if uuid_response and accession_response and (uuid_response != accession_response): print("Existing uuid/accession mismatch") if new_object and object_exists: print("Conflict: At least one identifier already exists and at least one does not exist") profiles = encodedcc.get_ENCODE("/profiles/", connection) supported_collections = list(profiles.keys()) if "Dataset" not in supported_collections: supported_collections.append("Dataset") type_list = new_json.pop('@type', []) if args.type: type_list = [args.type] if any(type_list): findit = False for x in supported_collections: if x.lower() == type_list[0].lower(): type_list = [x] findit = True if findit: if args.debug: print("Object will have type of", type_list[0]) else: print("Error! JSON object does not contain one of the supported types") print("Provided type:", type_list[0]) print("Please either change the JSON file or define the type with the --type feature") sys.exit(1) else: print("No type found for JSON object!") sys.exit(1) possible_collections = [x for x in type_list if x in supported_collections] if possible_collections: # collection = possible_collections[0] + 's/' collection = possible_collections[0] else: collection = [] if '@id' in new_json: identifier = new_json.pop('@id') elif 'uuid' in new_json: if collection: identifier = '/' + collection + '/' + new_json['uuid'] + '/' else: identifier = '/' + new_json['uuid'] + '/' elif 'accession' in new_json: if collection: identifier = '/' + collection + '/' + new_json['accession'] + '/' else: identifier = '/' + new_json['accession'] + '/' if 'attachment' in new_json: if 'href' in new_json['attachment']: pass else: try: filename = new_json['attachment']['download'] print("Setting filename to %s" % (filename)) except: print("Must specify either href or filename for attachment", file=sys.stderr) if new_json['attachment'].get('type'): mime_type = new_json['attachment'].get('type') else: try: mime_type, encoding = mimetypes.guess_type(filename) major, minor = mime_type.split('/') #detected_type = magic.from_file(filename, mime=True) print("Detected mime type %s" % (mime_type)) except: print("Failed to detect mime type in file %s" % (filename), file=sys.stderr) try: with open(filename, 'rb') as stream: print("opened") newvalue = { 'download': filename, # Just echoes the given filename as the download name 'type': mime_type, 'href': 'data:%s;base64,%s' % (mime_type, b64encode(stream.read())) } f = open('tmp', 'w') print(f, newvalue) new_json.update({'attachment': newvalue}) # add except: print("Cannot open file %s" % (filename), file=sys.stderr) if object_exists: if args.force_put: if not GET_ONLY: print("Replacing existing object") e = encodedcc.replace_ENCODE(identifier, connection, new_json) print(e) else: if not GET_ONLY: print("Patching existing object") e = encodedcc.patch_ENCODE(identifier, connection, new_json) print(e) elif new_object: if args.force_put: if not GET_ONLY: print("PUT'ing new object") e = encodedcc.replace_ENCODE(identifier, connection, new_json) print(e) else: if not GET_ONLY: print("POST'ing new object") if not any(collection): print("ERROR: Unable to POST to non-existing collection {}".format(collection)) sys.exit(1) e = encodedcc.new_ENCODE(connection, collection, new_json) print(e)