Exemple #1
0
def doGranuleCheckwithRecords(filename,
                              UMM='1.10',
                              outputform='CSV',
                              outfilename="result.csv"):
    print(filename)
    gid = filename.split('/')[-2]
    # retrieve the metadata record from the API using the urllib library
    api = urllib.request.urlopen(filename)
    #parse the metadata file using the xml library
    xml = ElementTree.parse(api)
    #retrieve the structure of the xml file
    root_element = xml.getroot()

    ck = GranuleChecker.Checker(UMM)
    #if(outputform == 'JSON'):
    #    for granule in root_element.iter('Granule'):
    #        metadata = XmlDictConfig(granule)
    #        return ck.checkAllJSON(metadata)
    #else:
    #    for granule in root_element.iter('Granule'):
    #        metadata = XmlDictConfig(granule)
    #        res = ck.checkAll(metadata)
    for granule in root_element.iter('Granule'):
        metadata = XmlDictConfig(granule)
        result = ck.checkAllJSON(metadata)
    od = toOrderedDict(result, metadata, filename, 'Granule', gid)
    return od
Exemple #2
0
    def _get_search_results(self, url, limit, **kwargs):
        """
        Search the CMR granules
        :param limit: limit of the number of results
        :param kwargs: search parameters
        :return: list of results (<Instance of Result>)
        """
        logging.info("======== Waiting for response ========")

        page_num = 1
        results = []
        while len(results) < limit:
            response = requests.get(url=url,
                                    params=dict(kwargs,
                                                page_num=page_num,
                                                page_size=self._PAGE_SIZE),
                                    headers=self._SEARCH_HEADER)
            unparsed_page = response.content
            page = ET.XML(unparsed_page)

            empty_page = True
            for child in list(page):
                if child.tag == 'result':
                    results.append(XmlDictConfig(child))
                    empty_page = False
                elif child.tag == 'error':
                    raise ValueError(
                        'Bad search response: {}'.format(unparsed_page))

            if empty_page:
                break
            else:
                page_num += 1

        return results
Exemple #3
0
def doGranuleCheckwithRecords(filename,
                              outputform='CSV',
                              outfilename="result.csv"):
    xml = ElementTree.parse(filename)
    root_element = xml.getroot()
    ck = GranuleChecker.Checker()

    if (outputform == 'JSON'):
        for granule in root_element.iter('Granule'):
            metadata = XmlDictConfig(granule)
            return ck.checkAllJSON(metadata)
    else:
        out_fp = open(outfilename, 'w')
        out_fp.write(granule_output_header)
        for granule in root_element.iter('Granule'):
            metadata = XmlDictConfig(granule)
            res = ck.checkAll(metadata)
        out_fp.write(filename + ', ' + res + '\n')
        out_fp.close()
Exemple #4
0
def doGranuleCheckwithIDList(filename,
                             outfilename="result.csv",
                             tmp_path="./"):
    in_fp = open(filename, 'r')
    out_fp = open(outfilename, 'w')
    out_fp.write(granule_output_header)
    ck = GranuleChecker.Checker()

    for line in iter(in_fp.readline, b''):
        conceptID = line.rstrip()
        if len(conceptID) != 0:
            result = searchGranule(limit=100, concept_id=conceptID)
            result[0].download(tmp_path)
            xml = ElementTree.parse(tmp_path + conceptID)
            root_element = xml.getroot()
            for granule in root_element.iter('Granule'):
                metadata = XmlDictConfig(granule)
                out_fp.write(conceptID + ", " + ck.checkAll(metadata) + '\n')
            silentremove(tmp_path + conceptID)
    in_fp.close()
    out_fp.close()
Exemple #5
0
def doCollectionCheckwithRecords(filename,
                                 outputform='CSV',
                                 outfilename="result.csv"):
    xml = ElementTree.parse(filename)
    root_element = xml.getroot()
    ck = CollectionChecker.Checker()

    result = None
    for collection in root_element.iter('Collection'):
        metadata = XmlDictConfig(collection)
        # print "ShortName = " + metadata['ShortName']
        result = ck.checkAllJSON(metadata)

    if (outputform == 'JSON'):
        return result
    else:
        od = toOrderedDict(result)

        with open(outfilename, 'wb') as f:  # Just use 'w' mode in 3.x
            w = csv.DictWriter(f, od)
            w.writeheader()
            w.writerow(od)
Exemple #6
0
def doCollectionCheckwithRecords(filename,
                                 titles,
                                 UMM='1.10',
                                 outputform='CSV',
                                 outfilename="result.csv"):
    print(filename)
    cid = filename.split('/')[-2]
    # retrieve the metadata record from the API using the urllib library
    api = urllib.request.urlopen(filename)
    #parse the metadata file using the xml library
    xml = ElementTree.parse(api)
    #retrieve the structure of the xml file
    root_element = xml.getroot()
    ck = CollectionChecker.Checker(UMM)
    result = None
    for collection in root_element.iter('Collection'):
        metadata = XmlDictConfig(collection)
        # print "ShortName = " + metadata['ShortName']
        result = ck.checkAllJSON(metadata, titles, cid)

    if (outputform == 'JSON'):
        return result
    else:
        od = toOrderedDict(result, metadata, filename, 'Collection', cid)
        with open(outfilename, 'w') as f:  # Just use 'w' mode in 3.x
            w = csv.DictWriter(f, od)
            w.writeheader()
            w.writerow(od)

        # for collection in root_element.iter('Collection'):
        #     metadata = XmlDictConfig(collection)
        #     # print "ShortName = " + metadata['ShortName']
        #     result = ck.checkAll(metadata)
        #     out_fp = open(outfilename, 'w')
        #     out_fp.write(collection_output_header)
        #     out_fp.write(metadata['ShortName']+ ', ' + result + '\n')
        #     out_fp.close()
        return od
Exemple #7
0
def doCollectionCheckwithShortNameList(filename,
                                       outfilename="result.csv",
                                       tmp_path="./"):
    in_fp = open(filename, 'r')
    # out_fp = open(outfilename, 'w')
    out_fp = open(outfilename, "wb")
    header_written = False
    w = None
    ck = CollectionChecker.Checker()

    for line in iter(in_fp.readline, b''):
        shortName = line.rstrip()
        if len(shortName) != 0:
            result = searchCollection(limit=100, short_name=shortName)
            result[0].download(tmp_path)
            xml = ElementTree.parse(tmp_path + shortName.replace('/', ''))
            root_element = xml.getroot()
            for collection in root_element.iter('Collection'):
                metadata = XmlDictConfig(collection)
                # print "ShortName = " + metadata['ShortName']
                result = ck.checkAll(metadata)
                od = toOrderedDict(result)
                if not header_written:
                    w = csv.DictWriter(out_fp, od)
                    w.writeheader()
                    header_written = True
                # while result.find(", ,") != -1:
                #     pos = result.find(", ,")
                #     result = result[:(pos+1)] + "np" + result[(pos+1):]
                # result += ",np,np,np,np,np,np"
                # out_fp.write(metadata['ShortName']+ ", " + ck.checkAll(metadata) + '\n')
                # out_fp.write(metadata['ShortName']+ ", " + result + '\n')
                w.writerow(od)
            silentremove(tmp_path + shortName.replace('/', ''))
    in_fp.close()
    out_fp.close()