Python new_ENCODE 예제들, encodedcc.new_ENCODE Python 예제들

예제 #1

0

파일 보기

파일: ENCODE_publications.py 프로젝트: resurgo-genetics/pyencoded-tools

 def check_ENCODE(self, idList, connection, otherIdList=[], bothDicts={}):
     for pmid in idList:
         extraData = bothDicts.get(pmid)
         ENCODEvalue = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=PMID:" + pmid, connection)
         if ENCODEvalue.get("@graph"):
             log = "PMID " + pmid + " is listed in ENCODE"
             logger.info('%s' % log)
             uuid = ENCODEvalue.get("@graph")[0].get("uuid")
             if not self.CREATE_ONLY:
                 self.compare_entrez_ENCODE(uuid, pmid, connection, extraData)
         else:
             if self.CREATE_ONLY:
                 self.get_entrez([pmid])
             titleEntrez = self.entrezDict[pmid].get("title")
             found = False
             for otherID in otherIdList:
                 titleENCODE = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=" + otherID, connection)
                 if titleENCODE.get("title") == titleEntrez:
                     log = pmid + " is in ENCODE by a different name " + titleENCODE.get("uuid")
                     logger.warning('%s' % log)
                     self.compare_entrez_ENCODE(titleENCODE.get("uuid"), pmid, connection, extraData)
                     if self.UPDATE:
                         newIdent = titleENCODE.get("identifiers")
                         newIdent.append("PMID:" + pmid)
                         patch_dict = {"identifiers": newIdent}
                         encodedcc.patch_ENCODE(titleENCODE.get("uuid"), connection, patch_dict)
                     found = True
             if found is False:
                 log = "This publication is not listed in ENCODE " + pmid
                 logger.warning('%s' % log)
                 if self.CREATE:
                     self.POST_COUNT += 1
                     pmidData = self.entrezDict[pmid]
                     log = "POSTing the new object: " + pmid
                     logger.info('%s' % log)
                     post_dict = {
                         "title": pmidData.get("title"),
                         "abstract": pmidData.get("abstract"),
                         "submitted_by": "/users/8b1f8780-b5d6-4fb7-a5a2-ddcec9054288/",
                         "lab": "/labs/encode-consortium/",
                         "award": "/awards/ENCODE/",
                         "categories": extraData.get("categories"),
                         "published_by": extraData.get("published_by"),
                         "date_published": pmidData.get("date_published"),
                         "authors": pmidData.get("authors"),
                         "identifiers": ["PMID:" + pmid],
                         "journal": pmidData.get("journal"),
                         "volume": pmidData.get("volume"),
                         "issue": pmidData.get("issue"),
                         "page": pmidData.get("page"),
                         "status": "published"
                     }
                     if extraData.get("data_used"):
                         post_dict["data_used"] = extraData.get("data_used")
                     encodedcc.new_ENCODE(connection, "publications", post_dict)

예제 #2

0

파일 보기

파일: ENCODE_publications.py 프로젝트: jessie-wangjie/pyencoded-tools

 def check_ENCODE(self, idList, connection, otherIdList=[], bothDicts={}):
     for pmid in idList:
         extraData = bothDicts.get(pmid)
         ENCODEvalue = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=PMID:" + pmid, connection)
         if ENCODEvalue.get("@graph"):
             log = "PMID " + pmid + " is listed in ENCODE"
             logger.info('%s' % log)
             uuid = ENCODEvalue.get("@graph")[0].get("uuid")
             if not self.CREATE_ONLY:
                 self.compare_entrez_ENCODE(uuid, pmid, connection, extraData)
         else:
             if self.CREATE_ONLY:
                 self.get_entrez([pmid])
             titleEntrez = self.entrezDict[pmid].get("title")
             found = False
             for otherID in otherIdList:
                 titleENCODE = encodedcc.get_ENCODE("/search/?type=publication&searchTerm=" + otherID, connection)
                 if titleENCODE.get("title") == titleEntrez:
                     log = pmid + " is in ENCODE by a different name " + titleENCODE.get("uuid")
                     logger.warning('%s' % log)
                     self.compare_entrez_ENCODE(titleENCODE.get("uuid"), pmid, connection, extraData)
                     if self.UPDATE:
                         newIdent = titleENCODE.get("identifiers")
                         newIdent.append("PMID:" + pmid)
                         patch_dict = {"identifiers": newIdent}
                         encodedcc.patch_ENCODE(titleENCODE.get("uuid"), connection, patch_dict)
                     found = True
             if found is False:
                 log = "This publication is not listed in ENCODE " + pmid
                 logger.warning('%s' % log)
                 if self.CREATE:
                     self.POST_COUNT += 1
                     pmidData = self.entrezDict[pmid]
                     log = "POSTing the new object: " + pmid
                     logger.info('%s' % log)
                     post_dict = {
                         "title": pmidData.get("title"),
                         "abstract": pmidData.get("abstract"),
                         "submitted_by": "/users/8b1f8780-b5d6-4fb7-a5a2-ddcec9054288/",
                         "lab": "/labs/encode-consortium/",
                         "award": "/awards/ENCODE/",
                         "categories": extraData.get("categories"),
                         "published_by": extraData.get("published_by"),
                         "date_published": pmidData.get("date_published"),
                         "authors": pmidData.get("authors"),
                         "identifiers": ["PMID:" + pmid],
                         "journal": pmidData.get("journal"),
                         "volume": pmidData.get("volume"),
                         "issue": pmidData.get("issue"),
                         "page": pmidData.get("page"),
                         "status": "published"
                     }
                     if extraData.get("data_used"):
                         post_dict["data_used"] = extraData.get("data_used")
                     encodedcc.new_ENCODE(connection, "publications", post_dict)

예제 #3

0

파일 보기

파일: 3777-gravely-protocol.py 프로젝트: resurgo-genetics/pyencoded-tools

def file_manager(key, value, connection, obj_type):
    filename = key.split("/")[-1]
    print("Downloading {}".format(filename))
    r = requests.get(key)
    with open(filename, "wb") as outfile:
        outfile.write(r.content)
    if obj_type == "Biosample":
        filepart = filename.split("-")[0]
    else:
        filepart = filename.split("-")[1]

    attach = attachment(filename)
    temp = "_".join(key.split("/")[-2:])
    aliases = ["brenton-graveley:" + temp]

    if (encodedcc.get_ENCODE(quote(aliases[0]),
                             connection)['status']) != 'error':

        removing_patch = {'status': 'deleted', 'aliases': []}
        print('DELETING ' + aliases[0] + ' ' + str(removing_patch))
        encodedcc.patch_ENCODE(quote(aliases[0]), connection, removing_patch)

    upload = {
        "aliases":
        aliases,
        "attachment":
        attach,
        "award":
        "U54HG007005",
        "document_type":
        "general protocol",
        "lab":
        "/labs/brenton-graveley/",
        "status":
        "released",
        "description":
        "{obj_type} protocol for {filepart} shRNA followed by RNA-seq".format(
            obj_type=obj_type, filepart=filepart),
    }

    print("Uploading {} as {}".format(filename, aliases[0]))

    encodedcc.new_ENCODE(connection, "Document", upload)

    print("Patching {} with document {}".format(value, aliases[0]))
    if obj_type == "Biosample":
        docs = {"protocol_documents": aliases}
    else:
        docs = {"documents": aliases}

    encodedcc.patch_ENCODE(quote(value), connection, docs)

    print("Removing document {}".format(filename))
    subprocess.run(["rm", filename])
    '''

예제 #4

0

파일 보기

파일: 3777-gravely-protocol.py 프로젝트: ENCODE-DCC/pyencoded-tools

def file_manager(key, value, connection, obj_type):
    filename = key.split("/")[-1]
    print("Downloading {}".format(filename))
    r = requests.get(key)
    with open(filename, "wb") as outfile:
        outfile.write(r.content)
    if obj_type == "Biosample":
        filepart = filename.split("-")[0]
    else:
        filepart = filename.split("-")[1]

    attach = attachment(filename)
    temp = "_".join(key.split("/")[-2:])
    aliases = ["brenton-graveley:" + temp]

    if (encodedcc.get_ENCODE(quote(aliases[0]), connection)['status']) != 'error':

        removing_patch = {'status':'deleted',
                          'aliases': []}
        print ('DELETING ' + aliases[0] + ' ' + str(removing_patch))
        encodedcc.patch_ENCODE(quote(aliases[0]), connection, removing_patch)


    upload = {"aliases": aliases,
              "attachment": attach,
              "award": "U54HG007005",
              "document_type": "general protocol",
              "lab": "/labs/brenton-graveley/",
              "status": "released",
              "description": "{obj_type} protocol for {filepart} shRNA followed by RNA-seq".format(obj_type=obj_type, filepart=filepart),
              }

    print("Uploading {} as {}".format(filename, aliases[0]))

    encodedcc.new_ENCODE(connection, "Document", upload)

    print("Patching {} with document {}".format(value, aliases[0]))
    if obj_type == "Biosample":
        docs = {"protocol_documents": aliases}
    else:
        docs = {"documents": aliases}
    
    encodedcc.patch_ENCODE(quote(value), connection, docs)

    print("Removing document {}".format(filename))
    subprocess.run(["rm", filename])
    
    '''

예제 #5

0

파일 보기

파일: ENCODE_import_data.py 프로젝트: ENCODE-DCC/pyencoded-tools

def excel_reader(datafile, sheet, update, connection, patchall):
    row = reader(datafile, sheetname=sheet)
    keys = next(row)  # grab the first row of headers
    total = 0
    error = 0
    success = 0
    patch = 0
    for values in row:
        total += 1
        post_json = dict(zip(keys, values))
        post_json = dict_patcher(post_json)
        # add attchments here
        if post_json.get("attachment"):
            attach = attachment(post_json["attachment"])
            post_json["attachment"] = attach
        print(post_json)
        temp = {}
        if post_json.get("uuid"):
            temp = encodedcc.get_ENCODE(post_json["uuid"], connection)
        elif post_json.get("aliases"):
            temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]),
                                        connection)
        elif post_json.get("accession"):
            temp = encodedcc.get_ENCODE(post_json["accession"], connection)
        elif post_json.get("@id"):
            temp = encodedcc.get_ENCODE(post_json["@id"], connection)
        if temp.get("uuid"):
            if patchall:
                e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json)
                if e["status"] == "error":
                    error += 1
                elif e["status"] == "success":
                    success += 1
                    patch += 1
            else:
                print("Object {} already exists.  Would you like to patch it instead?".format(temp["uuid"]))
                i = input("PATCH? y/n ")
                if i.lower() == "y":
                    e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json)
                    if e["status"] == "error":
                        error += 1
                    elif e["status"] == "success":
                        success += 1
                        patch += 1
        else:
            if update:
                print("POSTing data!")
                e = encodedcc.new_ENCODE(connection, sheet, post_json)
                if e["status"] == "error":
                    error += 1
                elif e["status"] == "success":
                    success += 1
    print("{sheet}: {success} out of {total} posted, {error} errors, {patch} patched".format(
        sheet=sheet.upper(), success=success, total=total, error=error, patch=patch))

예제 #6

0

파일 보기

파일: ENCODE_import_data.py 프로젝트: mmmika/pyencoded-tools

def excel_reader(datafile, sheet, update, connection, patchall):
    row = reader(datafile, sheetname=sheet)
    keys = next(row)  # grab the first row of headers
    total = 0
    error = 0
    success = 0
    patch = 0
    json_properties = encodedcc.get_ENCODE('/profiles/{}.json'.format(sheet),
                                           connection)['properties']
    new_accessions_aliases = []
    failed_postings = []
    for values in row:
        total += 1
        post_json = dict(zip(keys, values))
        post_json = dict_patcher(post_json)
        post_json = expose_objects(post_json, json_properties)
        # add attchments here
        if post_json.get("attachment"):
            attach = attachment(post_json["attachment"])
            post_json["attachment"] = attach
        print(post_json)
        temp = {}
        # Silence get_ENCODE failures.
        with encodedcc.print_muted():
            if post_json.get("uuid"):
                temp = encodedcc.get_ENCODE(post_json["uuid"], connection)
            elif post_json.get("aliases"):
                temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]),
                                            connection)
            elif post_json.get("accession"):
                temp = encodedcc.get_ENCODE(post_json["accession"], connection)
            elif post_json.get("@id"):
                temp = encodedcc.get_ENCODE(post_json["@id"], connection)
        if temp.get("uuid"):
            if patchall:
                e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json)
                if e["status"] == "error":
                    error += 1
                elif e["status"] == "success":
                    success += 1
                    patch += 1
            else:
                print(
                    "Object {} already exists.  Would you like to patch it instead?"
                    .format(temp["uuid"]))
                i = input("PATCH? y/n ")
                if i.lower() == "y":
                    e = encodedcc.patch_ENCODE(temp["uuid"], connection,
                                               post_json)
                    if e["status"] == "error":
                        error += 1
                    elif e["status"] == "success":
                        success += 1
                        patch += 1
        else:
            if update:
                print("POSTing data!")
                e = encodedcc.new_ENCODE(connection, sheet, post_json)
                if e["status"] == "error":
                    error += 1
                    failed_postings.append(
                        post_json.get('aliases', 'alias not specified'))
                elif e["status"] == "success":
                    new_object = e['@graph'][0]
                    # Print now and later.
                    print('New accession/UUID: {}'.format(
                        (new_object.get('accession', new_object.get('uuid')))))
                    new_accessions_aliases.append(
                        (new_object.get('accession', new_object.get('uuid')),
                         new_object.get('aliases')))
                    success += 1
    print(
        "{sheet}: {success} out of {total} posted, {error} errors, {patch} patched"
        .format(sheet=sheet.upper(),
                success=success,
                total=total,
                error=error,
                patch=patch))
    if new_accessions_aliases:
        print('New accession/UUID and alias:' if len(new_accessions_aliases) ==
              1 else 'New accessions/UUIDs and aliases:')
        for (accession, alias) in new_accessions_aliases:
            if len(alias) == 0:
                alias = 'alias not specified'
            else:
                alias = ', '.join(alias) if isinstance(alias, list) else alias
            print(accession, alias)
    if failed_postings:
        print('Posting failed for {} object(s):'.format(len(failed_postings)))
        for alias in failed_postings:
            print(', '.join(alias) if isinstance(alias, list) else alias)

예제 #7

0

파일 보기

def excel_reader(datafile, sheet, update, connection, patchall):
    row = reader(datafile, sheetname=sheet)
    keys = next(row)  # grab the first row of headers
    total = 0
    error = 0
    success = 0
    patch = 0
    for values in row:
        total += 1
        post_json = dict(zip(keys, values))
        post_json = dict_patcher(post_json)
        # add attchments here
        if post_json.get("attachment"):
            attach = attachment(post_json["attachment"])
            post_json["attachment"] = attach
        print(post_json)
        temp = {}
        if post_json.get("uuid"):
            temp = encodedcc.get_ENCODE(post_json["uuid"], connection)
        elif post_json.get("aliases"):
            temp = encodedcc.get_ENCODE(quote(post_json["aliases"][0]),
                                        connection)
        elif post_json.get("accession"):
            temp = encodedcc.get_ENCODE(post_json["accession"], connection)
        elif post_json.get("@id"):
            temp = encodedcc.get_ENCODE(post_json["@id"], connection)
        if temp.get("uuid"):
            if patchall:
                e = encodedcc.patch_ENCODE(temp["uuid"], connection, post_json)
                if e["status"] == "error":
                    error += 1
                elif e["status"] == "success":
                    success += 1
                    patch += 1
            else:
                print(
                    "Object {} already exists.  Would you like to patch it instead?"
                    .format(temp["uuid"]))
                i = input("PATCH? y/n ")
                if i.lower() == "y":
                    e = encodedcc.patch_ENCODE(temp["uuid"], connection,
                                               post_json)
                    if e["status"] == "error":
                        error += 1
                    elif e["status"] == "success":
                        success += 1
                        patch += 1
        else:
            if update:
                print("POSTing data!")
                e = encodedcc.new_ENCODE(connection, sheet, post_json)
                if e["status"] == "error":
                    error += 1
                elif e["status"] == "success":
                    success += 1
    print(
        "{sheet}: {success} out of {total} posted, {error} errors, {patch} patched"
        .format(sheet=sheet.upper(),
                success=success,
                total=total,
                error=error,
                patch=patch))

예제 #8

0

파일 보기

파일: one_ENCODE.py 프로젝트: mmmika/pyencoded-tools

def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__,
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        '--infile',
        '-i',
        help="File containing the JSON object as a JSON string.")
    parser.add_argument('--server', help="Full URL of the server.")
    parser.add_argument('--key',
                        default='default',
                        help="The keypair identifier from the keyfile.  \
                        Default is --key=default")
    parser.add_argument('--keyfile',
                        default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile\
                        =%s" % (os.path.expanduser("~/keypairs.json")))
    parser.add_argument('--authid', help="The HTTP auth ID.")
    parser.add_argument('--authpw', help="The HTTP auth PW.")
    parser.add_argument(
        '--force-put',
        default=False,
        action='store_true',
        help="Force the object to be PUT rather than PATCHed.  \
                        Default is False.")
    parser.add_argument('--get-only',
                        default=False,
                        action='store_true',
                        help="Do nothing but get the object and print it.  \
                        Default is False.")
    parser.add_argument('--id', help="URI for an object"),
    parser.add_argument('--debug',
                        default=False,
                        action='store_true',
                        help="Print debug messages.  Default is False.")
    parser.add_argument(
        '--frame',
        help=
        "define a frame to get back the JSON object, for use with --id. Default is frame=object",
        default="object")
    parser.add_argument('--type', help="the object's type")
    parser.add_argument(
        '--update',
        default=False,
        action='store_true',
        help="Let the script PATCH/POST the data.  Default is False")
    args = parser.parse_args()

    global DEBUG_ON
    DEBUG_ON = args.debug

    if args.get_only:
        GET_ONLY = True
    else:
        GET_ONLY = False

    key = encodedcc.ENC_Key(args.keyfile, args.key)
    if args.server and args.authpw and args.authid:
        key.server = args.server
        key.authid = args.authid
        key.authpw = args.authpw
        print("Creating authorization data from command line inputs")
    connection = encodedcc.ENC_Connection(key)
    print("Running on {}".format(connection.server))
    if args.update:
        print(
            "This is an UPDATE run! Data will be PATCHed or POSTed accordingly"
        )
    else:
        print("This is a dry run, no data will be changed")

    new_object = False
    if args.id:
        GET_ONLY = True
        print("Taking id to get from --id")
        new_json = {}
        uuid_response = {}
        accession_response = {}
        try:
            id_response = encodedcc.get_ENCODE(args.id,
                                               connection,
                                               frame=args.frame)
        except:
            id_response = {}
            new_object = True
    else:

        if args.infile:
            infile = open(args.infile, 'r')
        else:
            infile = sys.stdin

        new_json_string = infile.read()

        new_json = json.loads(new_json_string)
        if args.debug:
            encodedcc.pprint_ENCODE(new_json)
        if '@id' in new_json:
            id_response = encodedcc.get_ENCODE(new_json['@id'], connection)
            if id_response.get("code") == 404:
                id_response = {}
                new_object = True
        else:
            id_response = {}
            new_object = True
        if 'uuid' in new_json:
            uuid_response = encodedcc.get_ENCODE(new_json['uuid'], connection)
            if uuid_response.get("code") == 404:
                uuid_response = {}
                new_object = True
        else:
            uuid_response = {}
            new_object = True
        if 'accession' in new_json:
            accession_response = encodedcc.get_ENCODE(new_json['accession'],
                                                      connection)
            if accession_response.get("code") == 404:
                accession_response = {}
                new_object = True
        else:
            accession_response = {}
            new_object = True

        if new_object:
            print(
                "No identifier in new JSON object.  Assuming POST or PUT with auto-accessioning."
            )

    object_exists = False
    if id_response:
        object_exists = True
        print("Found matching @id:")
        encodedcc.pprint_ENCODE(id_response)
    if uuid_response:
        object_exists = True
        print("Found matching uuid:")
        encodedcc.pprint_ENCODE(uuid_response)
    if accession_response:
        object_exists = True
        print("Found matching accession")
        encodedcc.pprint_ENCODE(accession_response)

    if id_response and uuid_response and (id_response != uuid_response):
        print("Existing id/uuid mismatch")
    if id_response and accession_response and (id_response !=
                                               accession_response):
        print("Existing id/accession mismatch")
    if uuid_response and accession_response and (uuid_response !=
                                                 accession_response):
        print("Existing uuid/accession mismatch")

    if new_object and object_exists:
        print(
            "Conflict:  At least one identifier already exists and at least one does not exist"
        )

    profiles = encodedcc.get_ENCODE("/profiles/", connection)
    supported_collections = list(profiles.keys())
    if "Dataset" not in supported_collections:
        supported_collections.append("Dataset")

    type_list = new_json.pop('@type', [])
    if args.type:
        type_list = [args.type]
    if any(type_list):
        findit = False
        for x in supported_collections:
            if x.lower() == type_list[0].lower():
                type_list = [x]
                findit = True
        if findit:
            if args.debug:
                print("Object will have type of", type_list[0])
        else:
            print(
                "Error! JSON object does not contain one of the supported types"
            )
            print("Provided type:", type_list[0])
            print(
                "Please either change the JSON file or define the type with the --type feature"
            )
            sys.exit(1)
    else:
        print("No type found for JSON object!")
        sys.exit(1)

    possible_collections = [x for x in type_list if x in supported_collections]
    if possible_collections:
        # collection = possible_collections[0] + 's/'
        collection = possible_collections[0]
    else:
        collection = []
    if '@id' in new_json:
        identifier = new_json.pop('@id')
    elif 'uuid' in new_json:
        if collection:
            identifier = '/' + collection + '/' + new_json['uuid'] + '/'
        else:
            identifier = '/' + new_json['uuid'] + '/'
    elif 'accession' in new_json:
        if collection:
            identifier = '/' + collection + '/' + new_json['accession'] + '/'
        else:
            identifier = '/' + new_json['accession'] + '/'
    if 'attachment' in new_json:
        if 'href' in new_json['attachment']:
            pass
        else:
            try:
                filename = new_json['attachment']['download']
                print("Setting filename to %s" % (filename))
            except:
                print("Must specify either href or filename for attachment",
                      file=sys.stderr)
            if new_json['attachment'].get('type'):
                mime_type = new_json['attachment'].get('type')
            else:
                try:
                    mime_type, encoding = mimetypes.guess_type(filename)
                    major, minor = mime_type.split('/')
                    #detected_type = magic.from_file(filename, mime=True)
                    print("Detected mime type %s" % (mime_type))
                except:
                    print("Failed to detect mime type in file %s" % (filename),
                          file=sys.stderr)
            try:
                with open(filename, 'rb') as stream:
                    print("opened")
                    newvalue = {
                        'download':
                        filename,  # Just echoes the given filename as the download name
                        'type':
                        mime_type,
                        'href':
                        'data:%s;base64,%s' %
                        (mime_type, b64encode(stream.read()))
                    }
                f = open('tmp', 'w')
                print(f, newvalue)
                new_json.update({'attachment': newvalue})  # add
            except:
                print("Cannot open file %s" % (filename), file=sys.stderr)
    if object_exists:
        if args.force_put:
            if not GET_ONLY:
                print("Replacing existing object")
                if args.update:
                    e = encodedcc.replace_ENCODE(identifier, connection,
                                                 new_json)
                    print(e)
        else:
            if not GET_ONLY:
                print("PATCHing existing object")
                if args.update:
                    e = encodedcc.patch_ENCODE(identifier, connection,
                                               new_json)
                    print(e)
    elif new_object:
        if args.force_put:
            if not GET_ONLY:
                print("PUT'ing new object")
                if args.update:
                    e = encodedcc.replace_ENCODE(identifier, connection,
                                                 new_json)
                    print(e)
        else:
            if not GET_ONLY:
                print("POST'ing new object")
                if not any(collection):
                    print(
                        "ERROR: Unable to POST to non-existing collection {}".
                        format(collection))
                    sys.exit(1)
                if args.update:
                    e = encodedcc.new_ENCODE(connection, collection, new_json)
                    print(e)

예제 #9

0

파일 보기

파일: one_ENCODE.py 프로젝트: jessie-wangjie/pyencoded-tools

def main():

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__, epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument('--infile', '-i',
                        help="File containing the JSON object as a JSON string.")
    parser.add_argument('--server',
                        help="Full URL of the server.")
    parser.add_argument('--key',
                        default='default',
                        help="The keypair identifier from the keyfile.  \
                        Default is --key=default")
    parser.add_argument('--keyfile',
                        default=os.path.expanduser("~/keypairs.json"),
                        help="The keypair file.  Default is --keyfile\
                        =%s" % (os.path.expanduser("~/keypairs.json")))
    parser.add_argument('--authid',
                        help="The HTTP auth ID.")
    parser.add_argument('--authpw',
                        help="The HTTP auth PW.")
    parser.add_argument('--force-put',
                        default=False,
                        action='store_true',
                        help="Force the object to be PUT rather than PATCHed.  \
                        Default is False.")
    parser.add_argument('--get-only',
                        default=False,
                        action='store_true',
                        help="Do nothing but get the object and print it.  \
                        Default is False.")
    parser.add_argument('--id',
                        help="URI for an object"),
    parser.add_argument('--debug',
                        default=False,
                        action='store_true',
                        help="Print debug messages.  Default is False.")
    parser.add_argument('--frame',
                        help="define a frame to get back the JSON object, for use with --id. Default is frame=object",
                        default="object")
    parser.add_argument('--type',
                        help="the object's type")
    args = parser.parse_args()

    global DEBUG_ON
    DEBUG_ON = args.debug

    if args.get_only:
        GET_ONLY = True
    else:
        GET_ONLY = False

    key = encodedcc.ENC_Key(args.keyfile, args.key)
    connection = encodedcc.ENC_Connection(key)

    new_object = False
    if args.id:
        GET_ONLY = True
        print("Taking id to get from --id")
        new_json = {}
        uuid_response = {}
        accession_response = {}
        try:
            id_response = encodedcc.get_ENCODE(args.id, connection, frame=args.frame)
        except:
            id_response = {}
            new_object = True
    else:
        if args.infile:
            infile = open(args.infile, 'r')
        else:
            infile = sys.stdin

        new_json_string = infile.read()

        new_json = json.loads(new_json_string)
        if '@id' in new_json:
            try:
                id_response = encodedcc.get_ENCODE(new_json['@id'], connection)
            except:
                id_response = {}
                new_object = True
        else:
            id_response = {}
        if 'uuid' in new_json:
            try:
                uuid_response = encodedcc.get_ENCODE(new_json['uuid'], connection)
            except:
                uuid_response = {}
                new_object = True
        else:
            uuid_response = {}
        if 'accession' in new_json:
            try:
                accession_response = encodedcc.get_ENCODE(new_json['accession'], connection)
            except:
                accession_response = {}
                new_object = True
        else:
            print("No identifier in new JSON object.  Assuming POST or PUT with auto-accessioning.")
            new_object = True
            accession_response = {}

    object_exists = False
    if id_response:
        object_exists = True
        print("Found matching @id:")
        encodedcc.pprint_ENCODE(id_response)
    if uuid_response:
        object_exists = True
        print("Found matching uuid:")
        encodedcc.pprint_ENCODE(uuid_response)
    if accession_response:
        object_exists = True
        print("Found matching accession")
        encodedcc.pprint_ENCODE(accession_response)

    if id_response and uuid_response and (id_response != uuid_response):
        print("Existing id/uuid mismatch")
    if id_response and accession_response and (id_response != accession_response):
        print("Existing id/accession mismatch")
    if uuid_response and accession_response and (uuid_response != accession_response):
        print("Existing uuid/accession mismatch")

    if new_object and object_exists:
        print("Conflict:  At least one identifier already exists and at least one does not exist")

    profiles = encodedcc.get_ENCODE("/profiles/", connection)
    supported_collections = list(profiles.keys())
    if "Dataset" not in supported_collections:
        supported_collections.append("Dataset")

    type_list = new_json.pop('@type', [])
    if args.type:
        type_list = [args.type]
    if any(type_list):
        findit = False
        for x in supported_collections:
            if x.lower() == type_list[0].lower():
                type_list = [x]
                findit = True
        if findit:
            if args.debug:
                print("Object will have type of", type_list[0])
        else:
            print("Error! JSON object does not contain one of the supported types")
            print("Provided type:", type_list[0])
            print("Please either change the JSON file or define the type with the --type feature")
            sys.exit(1)
    else:
        print("No type found for JSON object!")
        sys.exit(1)

    possible_collections = [x for x in type_list if x in supported_collections]
    if possible_collections:
        # collection = possible_collections[0] + 's/'
        collection = possible_collections[0]
    else:
        collection = []
    if '@id' in new_json:
        identifier = new_json.pop('@id')
    elif 'uuid' in new_json:
        if collection:
            identifier = '/' + collection + '/' + new_json['uuid'] + '/'
        else:
            identifier = '/' + new_json['uuid'] + '/'
    elif 'accession' in new_json:
        if collection:
            identifier = '/' + collection + '/' + new_json['accession'] + '/'
        else:
            identifier = '/' + new_json['accession'] + '/'
    if 'attachment' in new_json:
        if 'href' in new_json['attachment']:
            pass
        else:
            try:
                filename = new_json['attachment']['download']
                print("Setting filename to %s" % (filename))
            except:
                print("Must specify either href or filename for attachment", file=sys.stderr)
            if new_json['attachment'].get('type'):
                mime_type = new_json['attachment'].get('type')
            else:
                try:
                    mime_type, encoding = mimetypes.guess_type(filename)
                    major, minor = mime_type.split('/')
                    #detected_type = magic.from_file(filename, mime=True)
                    print("Detected mime type %s" % (mime_type))
                except:
                    print("Failed to detect mime type in file %s" % (filename), file=sys.stderr)
            try:
                with open(filename, 'rb') as stream:
                    print("opened")
                    newvalue = {
                        'download': filename,  # Just echoes the given filename as the download name
                        'type': mime_type,
                        'href': 'data:%s;base64,%s' % (mime_type, b64encode(stream.read()))
                    }
                f = open('tmp', 'w')
                print(f, newvalue)
                new_json.update({'attachment': newvalue})  # add
            except:
                print("Cannot open file %s" % (filename), file=sys.stderr)
    if object_exists:
        if args.force_put:
            if not GET_ONLY:
                print("Replacing existing object")
                e = encodedcc.replace_ENCODE(identifier, connection, new_json)
                print(e)
        else:
            if not GET_ONLY:
                print("Patching existing object")
                e = encodedcc.patch_ENCODE(identifier, connection, new_json)
                print(e)
    elif new_object:
        if args.force_put:
            if not GET_ONLY:
                print("PUT'ing new object")
                e = encodedcc.replace_ENCODE(identifier, connection, new_json)
                print(e)
        else:
            if not GET_ONLY:
                print("POST'ing new object")
                if not any(collection):
                    print("ERROR: Unable to POST to non-existing collection {}".format(collection))
                    sys.exit(1)
                e = encodedcc.new_ENCODE(connection, collection, new_json)
                print(e)