Exemple #1
0
def set_mf_name(namespace):

    # Create a mediaflux connection
    cxn = mf_connect.connect()

    w = mfclient.XmlStringWriter('args')
    w.add("where", "namespace>=" + namespace + " and mf-name hasno value" "")
    w.add("size", "infinity")
    # mfcommand = "asset.query :where namespace>="+namespace+" and (xpath(asset/name) contians"+assetContains+")"
    # print w.doc_text()
    r = cxn.execute("asset.query", w.doc_text())
    # print r
    for a in r.values("id"):
        # print a
        nameq = mfclient.XmlStringWriter('args')
        nameq.add("where", "id=" + a)
        nameq.add("action", "get-name")
        # assetname = ""
        name = cxn.execute("asset.query", nameq.doc_text())

        if isinstance(name.value('name'), unicode):
            print "skip " + name.value('name')
        else:
            assetname = name.value("name")
            assetname = escape(assetname)
            print name.value("name")
            nameset = mfclient.XmlStringWriter('args')
            nameset.add("id", a)
            nameset.push("meta")
            nameset.push("mf-name")
            # nameset.add("name",name.value("name"))
            nameset.add("name", assetname)
            # print nameset.doc_text()
            cxn.execute("asset.set", nameset.doc_text())
Exemple #2
0
def create(doctype,description,elements):
    # Create a mediaflux connection
    cxn = mf_connect.connect()

    # dt = mfclient.XmlStringWriter('args')
    # dt.add("type",doctype)

    # match = True
    # dtexists = cxn.execute("asset.doc.type.exists",dt.doc_text())
    # if dtexists.boolean_value("exists"):
    #     mfDType = cxn.execute("asset.doc.type.describe",dt.doc_text())
    #     for element in elements:
    #         dte = mfDType.element("type/definition/" + element)
    #         if dte is None:
    #             match = False

    # if match:
    w = mfclient.XmlStringWriter('args')

    w.add("description",description)
    w.add("create", "true")
    w.add("tag","PROPAGABLE")
    w.add("type",doctype)

    w.push("definition")
    for element in elements:
        w.add("element", "", attributes={"type": "string", "name": str(element)})
    w.pop()

    print w.doc_text()

    cxn.execute("asset.doc.type.update",w.doc_text())
timestr = time.strftime("%Y%m%d-%H%M%S")

with open(projSummary + timestr + ".csv", 'ab') as f:
    header = ["project", "allocation", "usage"]
    writer = csv.writer(f)
    writer.writerow(header)
    f.close()

with open(storeSummary + timestr + ".csv", 'ab') as f:
    header = ["Store", "Size", "Used", "Free"]
    writer = csv.writer(f)
    writer.writerow(header)
    f.close()

# Create mediaflux connection
cxn = mf_connect.connect()

try:
    projsList = cxn.execute("vicnode.project.list")

    print projsList

    for proj in projsList:
        if proj.value() == "proj-cryoem_instrument_data-1128.4.51":
            namespace = "/projects/cryo-em/" + proj.value()
            projDetailsQuery = mfclient.XmlStringWriter('args')
            projDetailsQuery.add("namespace", namespace)
            projDetails = cxn.execute("asset.namespace.describe",
                                      projDetailsQuery.doc_text())
            allocation = projDetails.element(
                "namespace/quota/inherited/allocation")
Exemple #4
0
def omeka_upload(mf_id, mf_doctype, OMEKA_ENDPOINT, OMEKA_APIKEY,
                 OMEKA_COLLECTION_ID, OMEKA_ITEM_ID):
    # cxn = mfclient.MFConnection(MF_HOST, MF_PORT, MF_SSL)
    try:
        # connect to mediaflux
        cxn = mf_connect.connect()
        # cxn.connect(MF_DOMAIN, MF_USER, MF_PASSWORD)

        # get asset metadata
        w1 = mfclient.XmlStringWriter('args')
        w1.add('id', mf_id)
        ae = cxn.execute('asset.get', w1.doc_text()).element('asset')
        # doc = ae.element('meta/proj-VSF_Lantern_Glass_Slides-1128.4.47:glass-slide')
        doc = ae.element('meta/' + mf_doctype)

        # create omeka item
        w2 = mfclient.XmlStringWriter('args')
        w2.add('endpoint', OMEKA_ENDPOINT)
        w2.add('api-key', OMEKA_APIKEY)
        w2.add('collection', OMEKA_COLLECTION_ID)
        # mf metadata -> omeka metadata
        item_type = doc.value('type')
        if item_type:
            w2.push('item_type')
            w2.add('name', item_type.title())
            w2.pop()
        title = doc.value('title')
        if title:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Title')
            w2.pop()
            w2.add('text', title)
            w2.pop()
        subject = doc.value('subject')
        if subject:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Subject')
            w2.pop()
            w2.add('text', subject)
            w2.pop()
        description = doc.value('description')
        if description:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Description')
            w2.pop()
            w2.add('text', description)
            w2.pop()
        creator = doc.value('creator')
        if creator:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Creator')
            w2.pop()
            w2.add('text', creator)
            w2.pop()
        publisher = doc.value('publisher')
        if publisher:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Publisher')
            w2.pop()
            w2.add('text', publisher)
            w2.pop()
        date = doc.value('date')
        if date:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Date')
            w2.pop()
            w2.add('text', date)
            w2.pop()
        contributor = doc.value('contributor')
        if contributor:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Contributor')
            w2.pop()
            w2.add('text', contributor)
            w2.pop()
        rights = doc.value('rights')
        if rights:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Rights')
            w2.pop()
            w2.add('text', rights)
            w2.pop()
        format = doc.value('format')
        if format:
            w2.push('element_text')
            w2.push('element')
            w2.add('name', 'Format')
            w2.pop()
            w2.add('text', format)
            w2.pop()
        if OMEKA_COLLECTION_ID:
            w2.add('id', OMEKA_ITEM_ID)
            re = cxn.execute('omeka.item.update', w2.doc_text())
            item_id = re.value('item/@id')
            print("updated omeka item: " + item_id)
        else:
            re = cxn.execute('omeka.item.create', w2.doc_text())
            item_id = re.value('item/@id')
            print("created omeka item: " + item_id)

            # create omeka file
            w3 = mfclient.XmlStringWriter('args')
            w3.add('endpoint', OMEKA_ENDPOINT)
            w3.add('api-key', OMEKA_APIKEY)
            w3.add('item', item_id)
            w3.add('id', mf_id)
            re = cxn.execute('omeka.file.create', w3.doc_text())
            file_id = re.value('file/@id')
            print("created omeka file: " + file_id)

    finally:
        cxn.disconnect()
Exemple #5
0
def create_advanced(doctype, description, elements):
    cxn = mf_connect.connect()

    # dt = mfclient.XmlStringWriter('args')
    # dt.add("type",doctype)

    # match = True
    # dtexists = cxn.execute("asset.doc.type.exists",dt.doc_text())
    # if dtexists.boolean_value("exists"):
    #     mfDType = cxn.execute("asset.doc.type.describe",dt.doc_text())
    #     for element in elements:
    #         dte = mfDType.element("type/definition/" + element)
    #         if dte is None:
    #             match = False

    # if match:
    w = mfclient.XmlStringWriter('args')

    w.add("description", description)
    w.add("create", "true")
    w.add("tag", "PROPAGABLE")
    print str(doctype)
    w.add("type", str(doctype))

    w.push("definition")
    for element in elements:
        attribs = {}
        subAttribs = {}
        # w.add("element", "", attributes={"type": str(element['type']), "name": str(element['name'])})
        for part in element:
            # print str(part)
            # idURLS.update({"icon": iconURL + "&id=" + a})
            # attribs.update({part:element[part]})
            if str(part) == 'description':
                # print "skip"
                subAttribs.update({part:str(element[part])})
            elif str(part) == 'MF_required':
                print "skip"
            elif str(part) == 'instructions':
                subAttribs.update({str(part):str(element[part])})
            # elif str(part) == 'enumerated_values':
            #     if str(element[part]) != 'nan':
            #         attribs.update({str(part): str(element[part])})
            # elif str(part) == 'default':
            #     if str(element[part]) != 'nan':
            #         subAttribs.update({str(part): str(element[part])})
            else :
                print part
                print element[part]
                if str(element[part]) != 'nan':
                    attribs.update({str(part): str(element[part])})
                # attribs.update({str(part): str(element[part])})
        print attribs
        print subAttribs
        w.push ("element", attributes=attribs)
        for sAtrr in subAttribs:
            w.add (sAtrr,subAttribs[sAtrr])
        w.pop()
    # w.push("element","",attribs)
    # w.add(subAttribs)
    # w.pop()
    # w.add("element", "", attributes=attribs)
    w.pop()

    print w.doc_text()

    cxn.execute("asset.doc.type.update", w.doc_text())
Exemple #6
0
def main():
    results = {}
    cxn = mf_connect.connect()
    rind = 0

    try:
        idx=1
        page_size=20000
        completed=False

        while not completed:
            w = mfclient.XmlStringWriter('args')
            w.add('size', page_size)
            w.add('idx', idx)
            w.add("where", "namespace>=" + nameSpace)
            w.add('action','get-value')
            w.add('xpath','name',{'ename':'name'})
            w.add('xpath', 'namespace', {'ename': 'namespace'})
            w.add('xpath', 'type', {'ename': 'type'})

            re = cxn.execute('asset.query', args=w.doc_text())

            # process the query results here
            aes = re.elements('asset')
            if aes:
                # print "\nAssets paths in current page:"
                for ae in aes:
                    line = {}
                    # print ae.attribute('id')

                    if isinstance(ae.value('name'), unicode):
                        print "skip "+ae.attribute('id')+","+ae.value('name')+","+ae.value('namespace')+","+ae.value('type')
                    else:
                        line.update({'id': ae.attribute('id')})
                        # print ae.value('name')
                        line.update({'name': ae.value('name')})
                        # print ae.value('namespace')
                        # line.update({'namespace': ae.value('namespace')})
                        # line.update({'namespace': unicodedata.normalize('NFKD',ae.value('namespace')).encode('ascii','ignore')})
                        line.update({'namespace': ae.value('namespace')})
                        # print ae.value('type')
                        line.update({'type': ae.value('type')})
                        results.update({rind: line})
                        rind = rind + 1

            remaining = re.int_value('cursor/remaining')
            completed = remaining==0
            idx = re.int_value('cursor/next')
        # print results
        # for result in results:
        #     print result['id']

        matched_results = {}
        spreadsheetorphans = {}
        print len(results)
        # print results.values()
        # print results.keys()

        # for i in results.keys():
        #     print results[i]

        # Strip out all of the a IDs
        full_data = pd.read_excel(spreadsheet, asheet, index_row=1, na_values=['NA'])
        # # print full_data
        # # for row in full_data:
        for index, row in full_data.iterrows():
            # print row[assetNameCol]
            # print row[assetNameCol]
            # print assetID
            if not is_number(row[assetNameCol]):    #and row[assetNameCol] != assetNameCol:

                # When wanting to exclude the trailing letter
                mid3 = row[assetNameCol][5:11]
        #         # print mid3
        #
        #         # if mid3 in "102392":
        #         #     print mid3
        #
        #         # When wanting to include the trailing letter
        #         mid4 = row[assetNameCol ][5:12]
        #         # print mid4
        #
                match = 0
                id_match = r"[^0-9]" + reg.escape(str(mid3)) + r"[^0-9]"
                melu_match = r"^MELU"
                for i in results.keys():
                    if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']):
                    # if mid3 in results[i]['name']:
                        print id_match
                        print results[i]
                        line = {}
                        line.update({'meluID': row[assetNameCol]})
                        line.update({'id': results[i]['id']})
                        line.update({'namespace': results[i]['namespace']})
                        line.update({'name': results[i]['name']})
                        line.update({'type': results[i]['type']})
                        matched_results.update({i: line})
                        results.pop(i)
                        match = 1
                if match == 0:
                    for i in results.keys():
                        mid3 = mid3.lstrip("0")
                        id_match = r"[^0-9]" + reg.escape(str(mid3)) + r"[^0-9]"
                        if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']):
                        # if mid3 in results[i]['name']:
                            print id_match
                            print results[i]
                            line = {}
                            line.update({'meluID': row[assetNameCol]})
                            line.update({'id': results[i]['id']})
                            line.update({'namespace': results[i]['namespace']})
                            line.update({'name': results[i]['name']})
                            line.update({'type': results[i]['type']})
                            matched_results.update({i: line})
                            results.pop(i)
                            match = 1
                    if match == 0:
                        line = {}
                        line.update({'meluID': row[assetNameCol]})
                        spreadsheetorphans.update({index: line})



        print len(results)

        # Strip out all of the abcd... IDs
        full_data2 = pd.read_excel(spreadsheet, abcsheet, index_row=1, na_values=['NA'])
        # # print full_data2
        for index, row in full_data2.iterrows():
            # print row[assetNameCol]
            # print row[assetNameCol]
            # print assetID
            if not is_number(row[assetNameCol]) and row[assetNameCol] != assetNameCol:
                # When wanting to exclude the trailing letter
                # mid3 = row[assetNameCol][5:11]
        #         # print mid3
        #
                # When wanting to include the trailing letter
                mid4 = row[assetNameCol][5:12]
        #         # print mid4
                match = 0
                id_match = r"[^0-9]" + reg.escape(str(mid4)) + r"[^0-9]"
                melu_match = r"^MELU"
                for i in results.keys():
                    if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']):
                    # if mid4 in results[i]['name']:
                        print id_match
                        print results[i]
                        line = {}
                        line.update({'meluID': row[assetNameCol]})
                        line.update({'id': results[i]['id']})
                        line.update({'namespace': results[i]['namespace']})
                        line.update({'name': results[i]['name']})
                        line.update({'type': results[i]['type']})
                        matched_results.update({i: line})
                        results.pop(i)
                        match = 1
                if match == 0:
                    for i in results.keys():
                        mid4 = mid4.lstrip("0")
                        id_match = r"[^0-9]" + reg.escape(str(mid4)) + r"[^0-9]"
                        if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']):
                        # if mid4 in results[i]['name']:
                            print id_match
                            print results[i]
                            line = {}
                            line.update({'meluID': row[assetNameCol]})
                            line.update({'id': results[i]['id']})
                            line.update({'namespace': results[i]['namespace']})
                            line.update({'name': results[i]['name']})
                            line.update({'type': results[i]['type']})
                            matched_results.update({i: line})
                            results.pop(i)
                            match = 1
                    if match == 0:
                        line = {}
                        line.update({'meluID': row[assetNameCol]})
                        spreadsheetorphans.update({index: line})

        print len(results)
        # csv_file = "melu_objs_not_a_match_" + timestr + ".csv"
        # WriteDictToCSV(csv_file, ['id', 'namespace', 'name', 'type'], final_results)
        csv_file = "melu_spec_not_a_match_" + timestr + ".csv"
        WriteDictToCSV(csv_file, ['id','namespace','name','type'], results)
        csv_matched_file = "melu_spec_matched_" + timestr + ".csv"
        WriteDictToCSV(csv_matched_file, ['meluID','id','namespace','name','type'], matched_results)
        spreadsheetorphans_file = "melu_spec_orphaned_" + timestr + ".csv"
        WriteDictToCSV(spreadsheetorphans_file, ['meluID'], spreadsheetorphans)


    finally:
        cxn.disconnect()