def set_mf_name(namespace): # Create a mediaflux connection cxn = mf_connect.connect() w = mfclient.XmlStringWriter('args') w.add("where", "namespace>=" + namespace + " and mf-name hasno value" "") w.add("size", "infinity") # mfcommand = "asset.query :where namespace>="+namespace+" and (xpath(asset/name) contians"+assetContains+")" # print w.doc_text() r = cxn.execute("asset.query", w.doc_text()) # print r for a in r.values("id"): # print a nameq = mfclient.XmlStringWriter('args') nameq.add("where", "id=" + a) nameq.add("action", "get-name") # assetname = "" name = cxn.execute("asset.query", nameq.doc_text()) if isinstance(name.value('name'), unicode): print "skip " + name.value('name') else: assetname = name.value("name") assetname = escape(assetname) print name.value("name") nameset = mfclient.XmlStringWriter('args') nameset.add("id", a) nameset.push("meta") nameset.push("mf-name") # nameset.add("name",name.value("name")) nameset.add("name", assetname) # print nameset.doc_text() cxn.execute("asset.set", nameset.doc_text())
def create(doctype,description,elements): # Create a mediaflux connection cxn = mf_connect.connect() # dt = mfclient.XmlStringWriter('args') # dt.add("type",doctype) # match = True # dtexists = cxn.execute("asset.doc.type.exists",dt.doc_text()) # if dtexists.boolean_value("exists"): # mfDType = cxn.execute("asset.doc.type.describe",dt.doc_text()) # for element in elements: # dte = mfDType.element("type/definition/" + element) # if dte is None: # match = False # if match: w = mfclient.XmlStringWriter('args') w.add("description",description) w.add("create", "true") w.add("tag","PROPAGABLE") w.add("type",doctype) w.push("definition") for element in elements: w.add("element", "", attributes={"type": "string", "name": str(element)}) w.pop() print w.doc_text() cxn.execute("asset.doc.type.update",w.doc_text())
timestr = time.strftime("%Y%m%d-%H%M%S") with open(projSummary + timestr + ".csv", 'ab') as f: header = ["project", "allocation", "usage"] writer = csv.writer(f) writer.writerow(header) f.close() with open(storeSummary + timestr + ".csv", 'ab') as f: header = ["Store", "Size", "Used", "Free"] writer = csv.writer(f) writer.writerow(header) f.close() # Create mediaflux connection cxn = mf_connect.connect() try: projsList = cxn.execute("vicnode.project.list") print projsList for proj in projsList: if proj.value() == "proj-cryoem_instrument_data-1128.4.51": namespace = "/projects/cryo-em/" + proj.value() projDetailsQuery = mfclient.XmlStringWriter('args') projDetailsQuery.add("namespace", namespace) projDetails = cxn.execute("asset.namespace.describe", projDetailsQuery.doc_text()) allocation = projDetails.element( "namespace/quota/inherited/allocation")
def omeka_upload(mf_id, mf_doctype, OMEKA_ENDPOINT, OMEKA_APIKEY, OMEKA_COLLECTION_ID, OMEKA_ITEM_ID): # cxn = mfclient.MFConnection(MF_HOST, MF_PORT, MF_SSL) try: # connect to mediaflux cxn = mf_connect.connect() # cxn.connect(MF_DOMAIN, MF_USER, MF_PASSWORD) # get asset metadata w1 = mfclient.XmlStringWriter('args') w1.add('id', mf_id) ae = cxn.execute('asset.get', w1.doc_text()).element('asset') # doc = ae.element('meta/proj-VSF_Lantern_Glass_Slides-1128.4.47:glass-slide') doc = ae.element('meta/' + mf_doctype) # create omeka item w2 = mfclient.XmlStringWriter('args') w2.add('endpoint', OMEKA_ENDPOINT) w2.add('api-key', OMEKA_APIKEY) w2.add('collection', OMEKA_COLLECTION_ID) # mf metadata -> omeka metadata item_type = doc.value('type') if item_type: w2.push('item_type') w2.add('name', item_type.title()) w2.pop() title = doc.value('title') if title: w2.push('element_text') w2.push('element') w2.add('name', 'Title') w2.pop() w2.add('text', title) w2.pop() subject = doc.value('subject') if subject: w2.push('element_text') w2.push('element') w2.add('name', 'Subject') w2.pop() w2.add('text', subject) w2.pop() description = doc.value('description') if description: w2.push('element_text') w2.push('element') w2.add('name', 'Description') w2.pop() w2.add('text', description) w2.pop() creator = doc.value('creator') if creator: w2.push('element_text') w2.push('element') w2.add('name', 'Creator') w2.pop() w2.add('text', creator) w2.pop() publisher = doc.value('publisher') if publisher: w2.push('element_text') w2.push('element') w2.add('name', 'Publisher') w2.pop() w2.add('text', publisher) w2.pop() date = doc.value('date') if date: w2.push('element_text') w2.push('element') w2.add('name', 'Date') w2.pop() w2.add('text', date) w2.pop() contributor = doc.value('contributor') if contributor: w2.push('element_text') w2.push('element') w2.add('name', 'Contributor') w2.pop() w2.add('text', contributor) w2.pop() rights = doc.value('rights') if rights: w2.push('element_text') w2.push('element') w2.add('name', 'Rights') w2.pop() w2.add('text', rights) w2.pop() format = doc.value('format') if format: w2.push('element_text') w2.push('element') w2.add('name', 'Format') w2.pop() w2.add('text', format) w2.pop() if OMEKA_COLLECTION_ID: w2.add('id', OMEKA_ITEM_ID) re = cxn.execute('omeka.item.update', w2.doc_text()) item_id = re.value('item/@id') print("updated omeka item: " + item_id) else: re = cxn.execute('omeka.item.create', w2.doc_text()) item_id = re.value('item/@id') print("created omeka item: " + item_id) # create omeka file w3 = mfclient.XmlStringWriter('args') w3.add('endpoint', OMEKA_ENDPOINT) w3.add('api-key', OMEKA_APIKEY) w3.add('item', item_id) w3.add('id', mf_id) re = cxn.execute('omeka.file.create', w3.doc_text()) file_id = re.value('file/@id') print("created omeka file: " + file_id) finally: cxn.disconnect()
def create_advanced(doctype, description, elements): cxn = mf_connect.connect() # dt = mfclient.XmlStringWriter('args') # dt.add("type",doctype) # match = True # dtexists = cxn.execute("asset.doc.type.exists",dt.doc_text()) # if dtexists.boolean_value("exists"): # mfDType = cxn.execute("asset.doc.type.describe",dt.doc_text()) # for element in elements: # dte = mfDType.element("type/definition/" + element) # if dte is None: # match = False # if match: w = mfclient.XmlStringWriter('args') w.add("description", description) w.add("create", "true") w.add("tag", "PROPAGABLE") print str(doctype) w.add("type", str(doctype)) w.push("definition") for element in elements: attribs = {} subAttribs = {} # w.add("element", "", attributes={"type": str(element['type']), "name": str(element['name'])}) for part in element: # print str(part) # idURLS.update({"icon": iconURL + "&id=" + a}) # attribs.update({part:element[part]}) if str(part) == 'description': # print "skip" subAttribs.update({part:str(element[part])}) elif str(part) == 'MF_required': print "skip" elif str(part) == 'instructions': subAttribs.update({str(part):str(element[part])}) # elif str(part) == 'enumerated_values': # if str(element[part]) != 'nan': # attribs.update({str(part): str(element[part])}) # elif str(part) == 'default': # if str(element[part]) != 'nan': # subAttribs.update({str(part): str(element[part])}) else : print part print element[part] if str(element[part]) != 'nan': attribs.update({str(part): str(element[part])}) # attribs.update({str(part): str(element[part])}) print attribs print subAttribs w.push ("element", attributes=attribs) for sAtrr in subAttribs: w.add (sAtrr,subAttribs[sAtrr]) w.pop() # w.push("element","",attribs) # w.add(subAttribs) # w.pop() # w.add("element", "", attributes=attribs) w.pop() print w.doc_text() cxn.execute("asset.doc.type.update", w.doc_text())
def main(): results = {} cxn = mf_connect.connect() rind = 0 try: idx=1 page_size=20000 completed=False while not completed: w = mfclient.XmlStringWriter('args') w.add('size', page_size) w.add('idx', idx) w.add("where", "namespace>=" + nameSpace) w.add('action','get-value') w.add('xpath','name',{'ename':'name'}) w.add('xpath', 'namespace', {'ename': 'namespace'}) w.add('xpath', 'type', {'ename': 'type'}) re = cxn.execute('asset.query', args=w.doc_text()) # process the query results here aes = re.elements('asset') if aes: # print "\nAssets paths in current page:" for ae in aes: line = {} # print ae.attribute('id') if isinstance(ae.value('name'), unicode): print "skip "+ae.attribute('id')+","+ae.value('name')+","+ae.value('namespace')+","+ae.value('type') else: line.update({'id': ae.attribute('id')}) # print ae.value('name') line.update({'name': ae.value('name')}) # print ae.value('namespace') # line.update({'namespace': ae.value('namespace')}) # line.update({'namespace': unicodedata.normalize('NFKD',ae.value('namespace')).encode('ascii','ignore')}) line.update({'namespace': ae.value('namespace')}) # print ae.value('type') line.update({'type': ae.value('type')}) results.update({rind: line}) rind = rind + 1 remaining = re.int_value('cursor/remaining') completed = remaining==0 idx = re.int_value('cursor/next') # print results # for result in results: # print result['id'] matched_results = {} spreadsheetorphans = {} print len(results) # print results.values() # print results.keys() # for i in results.keys(): # print results[i] # Strip out all of the a IDs full_data = pd.read_excel(spreadsheet, asheet, index_row=1, na_values=['NA']) # # print full_data # # for row in full_data: for index, row in full_data.iterrows(): # print row[assetNameCol] # print row[assetNameCol] # print assetID if not is_number(row[assetNameCol]): #and row[assetNameCol] != assetNameCol: # When wanting to exclude the trailing letter mid3 = row[assetNameCol][5:11] # # print mid3 # # # if mid3 in "102392": # # print mid3 # # # When wanting to include the trailing letter # mid4 = row[assetNameCol ][5:12] # # print mid4 # match = 0 id_match = r"[^0-9]" + reg.escape(str(mid3)) + r"[^0-9]" melu_match = r"^MELU" for i in results.keys(): if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']): # if mid3 in results[i]['name']: print id_match print results[i] line = {} line.update({'meluID': row[assetNameCol]}) line.update({'id': results[i]['id']}) line.update({'namespace': results[i]['namespace']}) line.update({'name': results[i]['name']}) line.update({'type': results[i]['type']}) matched_results.update({i: line}) results.pop(i) match = 1 if match == 0: for i in results.keys(): mid3 = mid3.lstrip("0") id_match = r"[^0-9]" + reg.escape(str(mid3)) + r"[^0-9]" if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']): # if mid3 in results[i]['name']: print id_match print results[i] line = {} line.update({'meluID': row[assetNameCol]}) line.update({'id': results[i]['id']}) line.update({'namespace': results[i]['namespace']}) line.update({'name': results[i]['name']}) line.update({'type': results[i]['type']}) matched_results.update({i: line}) results.pop(i) match = 1 if match == 0: line = {} line.update({'meluID': row[assetNameCol]}) spreadsheetorphans.update({index: line}) print len(results) # Strip out all of the abcd... IDs full_data2 = pd.read_excel(spreadsheet, abcsheet, index_row=1, na_values=['NA']) # # print full_data2 for index, row in full_data2.iterrows(): # print row[assetNameCol] # print row[assetNameCol] # print assetID if not is_number(row[assetNameCol]) and row[assetNameCol] != assetNameCol: # When wanting to exclude the trailing letter # mid3 = row[assetNameCol][5:11] # # print mid3 # # When wanting to include the trailing letter mid4 = row[assetNameCol][5:12] # # print mid4 match = 0 id_match = r"[^0-9]" + reg.escape(str(mid4)) + r"[^0-9]" melu_match = r"^MELU" for i in results.keys(): if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']): # if mid4 in results[i]['name']: print id_match print results[i] line = {} line.update({'meluID': row[assetNameCol]}) line.update({'id': results[i]['id']}) line.update({'namespace': results[i]['namespace']}) line.update({'name': results[i]['name']}) line.update({'type': results[i]['type']}) matched_results.update({i: line}) results.pop(i) match = 1 if match == 0: for i in results.keys(): mid4 = mid4.lstrip("0") id_match = r"[^0-9]" + reg.escape(str(mid4)) + r"[^0-9]" if reg.search(melu_match, results[i]['name']) and reg.search(id_match, results[i]['name']): # if mid4 in results[i]['name']: print id_match print results[i] line = {} line.update({'meluID': row[assetNameCol]}) line.update({'id': results[i]['id']}) line.update({'namespace': results[i]['namespace']}) line.update({'name': results[i]['name']}) line.update({'type': results[i]['type']}) matched_results.update({i: line}) results.pop(i) match = 1 if match == 0: line = {} line.update({'meluID': row[assetNameCol]}) spreadsheetorphans.update({index: line}) print len(results) # csv_file = "melu_objs_not_a_match_" + timestr + ".csv" # WriteDictToCSV(csv_file, ['id', 'namespace', 'name', 'type'], final_results) csv_file = "melu_spec_not_a_match_" + timestr + ".csv" WriteDictToCSV(csv_file, ['id','namespace','name','type'], results) csv_matched_file = "melu_spec_matched_" + timestr + ".csv" WriteDictToCSV(csv_matched_file, ['meluID','id','namespace','name','type'], matched_results) spreadsheetorphans_file = "melu_spec_orphaned_" + timestr + ".csv" WriteDictToCSV(spreadsheetorphans_file, ['meluID'], spreadsheetorphans) finally: cxn.disconnect()