def doGranuleCheckwithRecords(filename, UMM='1.10', outputform='CSV', outfilename="result.csv"): print(filename) gid = filename.split('/')[-2] # retrieve the metadata record from the API using the urllib library api = urllib.request.urlopen(filename) #parse the metadata file using the xml library xml = ElementTree.parse(api) #retrieve the structure of the xml file root_element = xml.getroot() ck = GranuleChecker.Checker(UMM) #if(outputform == 'JSON'): # for granule in root_element.iter('Granule'): # metadata = XmlDictConfig(granule) # return ck.checkAllJSON(metadata) #else: # for granule in root_element.iter('Granule'): # metadata = XmlDictConfig(granule) # res = ck.checkAll(metadata) for granule in root_element.iter('Granule'): metadata = XmlDictConfig(granule) result = ck.checkAllJSON(metadata) od = toOrderedDict(result, metadata, filename, 'Granule', gid) return od
def _get_search_results(self, url, limit, **kwargs): """ Search the CMR granules :param limit: limit of the number of results :param kwargs: search parameters :return: list of results (<Instance of Result>) """ logging.info("======== Waiting for response ========") page_num = 1 results = [] while len(results) < limit: response = requests.get(url=url, params=dict(kwargs, page_num=page_num, page_size=self._PAGE_SIZE), headers=self._SEARCH_HEADER) unparsed_page = response.content page = ET.XML(unparsed_page) empty_page = True for child in list(page): if child.tag == 'result': results.append(XmlDictConfig(child)) empty_page = False elif child.tag == 'error': raise ValueError( 'Bad search response: {}'.format(unparsed_page)) if empty_page: break else: page_num += 1 return results
def doGranuleCheckwithRecords(filename, outputform='CSV', outfilename="result.csv"): xml = ElementTree.parse(filename) root_element = xml.getroot() ck = GranuleChecker.Checker() if (outputform == 'JSON'): for granule in root_element.iter('Granule'): metadata = XmlDictConfig(granule) return ck.checkAllJSON(metadata) else: out_fp = open(outfilename, 'w') out_fp.write(granule_output_header) for granule in root_element.iter('Granule'): metadata = XmlDictConfig(granule) res = ck.checkAll(metadata) out_fp.write(filename + ', ' + res + '\n') out_fp.close()
def doGranuleCheckwithIDList(filename, outfilename="result.csv", tmp_path="./"): in_fp = open(filename, 'r') out_fp = open(outfilename, 'w') out_fp.write(granule_output_header) ck = GranuleChecker.Checker() for line in iter(in_fp.readline, b''): conceptID = line.rstrip() if len(conceptID) != 0: result = searchGranule(limit=100, concept_id=conceptID) result[0].download(tmp_path) xml = ElementTree.parse(tmp_path + conceptID) root_element = xml.getroot() for granule in root_element.iter('Granule'): metadata = XmlDictConfig(granule) out_fp.write(conceptID + ", " + ck.checkAll(metadata) + '\n') silentremove(tmp_path + conceptID) in_fp.close() out_fp.close()
def doCollectionCheckwithRecords(filename, outputform='CSV', outfilename="result.csv"): xml = ElementTree.parse(filename) root_element = xml.getroot() ck = CollectionChecker.Checker() result = None for collection in root_element.iter('Collection'): metadata = XmlDictConfig(collection) # print "ShortName = " + metadata['ShortName'] result = ck.checkAllJSON(metadata) if (outputform == 'JSON'): return result else: od = toOrderedDict(result) with open(outfilename, 'wb') as f: # Just use 'w' mode in 3.x w = csv.DictWriter(f, od) w.writeheader() w.writerow(od)
def doCollectionCheckwithRecords(filename, titles, UMM='1.10', outputform='CSV', outfilename="result.csv"): print(filename) cid = filename.split('/')[-2] # retrieve the metadata record from the API using the urllib library api = urllib.request.urlopen(filename) #parse the metadata file using the xml library xml = ElementTree.parse(api) #retrieve the structure of the xml file root_element = xml.getroot() ck = CollectionChecker.Checker(UMM) result = None for collection in root_element.iter('Collection'): metadata = XmlDictConfig(collection) # print "ShortName = " + metadata['ShortName'] result = ck.checkAllJSON(metadata, titles, cid) if (outputform == 'JSON'): return result else: od = toOrderedDict(result, metadata, filename, 'Collection', cid) with open(outfilename, 'w') as f: # Just use 'w' mode in 3.x w = csv.DictWriter(f, od) w.writeheader() w.writerow(od) # for collection in root_element.iter('Collection'): # metadata = XmlDictConfig(collection) # # print "ShortName = " + metadata['ShortName'] # result = ck.checkAll(metadata) # out_fp = open(outfilename, 'w') # out_fp.write(collection_output_header) # out_fp.write(metadata['ShortName']+ ', ' + result + '\n') # out_fp.close() return od
def doCollectionCheckwithShortNameList(filename, outfilename="result.csv", tmp_path="./"): in_fp = open(filename, 'r') # out_fp = open(outfilename, 'w') out_fp = open(outfilename, "wb") header_written = False w = None ck = CollectionChecker.Checker() for line in iter(in_fp.readline, b''): shortName = line.rstrip() if len(shortName) != 0: result = searchCollection(limit=100, short_name=shortName) result[0].download(tmp_path) xml = ElementTree.parse(tmp_path + shortName.replace('/', '')) root_element = xml.getroot() for collection in root_element.iter('Collection'): metadata = XmlDictConfig(collection) # print "ShortName = " + metadata['ShortName'] result = ck.checkAll(metadata) od = toOrderedDict(result) if not header_written: w = csv.DictWriter(out_fp, od) w.writeheader() header_written = True # while result.find(", ,") != -1: # pos = result.find(", ,") # result = result[:(pos+1)] + "np" + result[(pos+1):] # result += ",np,np,np,np,np,np" # out_fp.write(metadata['ShortName']+ ", " + ck.checkAll(metadata) + '\n') # out_fp.write(metadata['ShortName']+ ", " + result + '\n') w.writerow(od) silentremove(tmp_path + shortName.replace('/', '')) in_fp.close() out_fp.close()