Example #1
0
def run_ingest(config, xmlfile):

    with open(xmlfile, 'rb') as f:
        xmlstr = f.read()

    xmldict = xmltodict.parse(xmlstr)
    tables = xmldict["soap:Envelope"]["soap:Body"]["showResponse"]["showResult"]["diffgr:diffgram"]["NewDataSet"]["Table"]
    data = []
    for t in tables:
        obj = {
            "arrival_date": t["Arrival_Date"],
            "district": t["District"],
            "market": t["Market"],
            "max_price": t["Max_x0020_Price"],
            "min_price": t["Min_x0020_Price"],
            "modal_price": t["Modal_x0020_Price"],
            "state": t["State"],
            "variety": t["Variety"]
        }

        if "Commodity" in t:
            obj["commodity"] = t["Commodity"]
        elif "Column1" in t:
            obj["commodity"] = t["Column1"]
        else:
            logger.warn("Commodity not found in %s" % t["@diffgr:id"])
            continue

        logger.debug("Inserted %s %s %s" % (obj["commodity"], obj["market"], obj["arrival_date"]))
        data.append(obj)

    mongo_helper = MongoHelper(config)
    mongo_helper.rename_collection("mandi_prices")
    mongo_helper.save("mandi_prices", docs=data)
Example #2
0
    def __cics_geocode(self, state, district, market):
        f_str_state = ""
        f_str_district = ""
        for st in self.states:
            if st['name'].lower() == state.lower():
                state_id = st['id']
                f_str_state = '"state":"' + state_id + '"'
                for di in st['districts']:
                    if di['name'].lower() == district.lower():
                        district_id = di['id']
                        f_str_district = ', "district":"' + district_id + '"'
                        break

        f_param = "{" + f_str_state + f_str_district + "}"
        data = urllib.urlencode({"q": market, "f": f_param})
        req = requests.post(
            "http://india.csis.u-tokyo.ac.jp/geocode-cgi/census_ajax_json.cgi",
            data=data)
        logger.debug("cics request for %s %s %s" % (state, district, market))

        xmldict = xmltodict.parse(req.text)
        if "markers" in xmldict:
            results = xmldict["markers"]
            if results and "marker" in results:
                return results["marker"]
        return None
Example #3
0
    def check_mandi_locations(self):
        logger.debug("Check mandi locations")
        mandi_prices = self.mongo_helper.db["mandi_prices"]
        mandi_locations = self.mongo_helper.db["mandi_locations"]

        cursor = mandi_prices.find()
        for mp in cursor:
            state = (mp["state"]).lower()
            district = (mp["district"]).lower()
            market = (mp["market"]).lower()

            query = {"state": state, "district": district, "market": market}
            doc = mandi_locations.find_one(query)

            if doc is None:
                doc = {
                    "state": state, "district": district, "market": market
                }
                doc["_id"] = mandi_locations.insert(doc)

            if not "cics_geocode" in doc:
                cics_data = self.__cics_geocode(state, district, market)
                if cics_data and len(cics_data) > 0:
                    mandi_locations.update({"_id": doc["_id"]}, {"$set": {"cics_geocode": cics_data}})

            if not "nm_geocode" in doc:
                nm_data = self.__nominatim_geocode(state, district, market)
                if nm_data and len(nm_data) > 0:
                    mandi_locations.update({"_id": doc["_id"]}, {"$set": {"nm_geocode": nm_data}})

            logger.debug("Inserted new mandi location for %s %s %s" % (state, district, market))
Example #4
0
    def __nominatim_geocode(self, state, district, market):
        query = "%s, %s, %s" % (market, district, state)
        params = {"format": "json", "q": query}
        req = requests.get(NOMINATIM["api_url"], params=params)
        logger.debug("nm request for %s %s %s" % (state, district, market))

        resp = req.json()
        return resp
Example #5
0
    def __nominatim_geocode(self, state, district, market):
        query = "%s, %s, %s" % (market, district, state)
        params = {
            "format": "json",
            "q": query
        }
        req = requests.get(NOMINATIM["api_url"], params=params)
        logger.debug("nm request for %s %s %s" % (state, district, market))

        resp = req.json()
        return resp
Example #6
0
def download_file():
    dl_url = OGD["mandi_prices_xml_url"]
    try:
        req = requests.get(dl_url)
    except:
        logger.critical("Error while downloading %s" % dl_url)
        raise

    dl_file_name = os.path.join(data_dir,
            "%s_%s.xml" % (os.path.splitext(os.path.basename(dl_url))[0], datetime.now().strftime("%y-%m-%d")))
    if os.path.exists(dl_file_name):
        logger.debug("Deleted existing file %s" % dl_file_name)
        return False

    with open(dl_file_name, "w") as dlf:
        dlf.write(req.text)

    return dl_file_name
Example #7
0
def download_file():
    dl_url = OGD["mandi_prices_xml_url"]
    try:
        req = requests.get(dl_url)
    except:
        logger.critical("Error while downloading %s" % dl_url)
        raise

    dl_file_name = os.path.join(
        data_dir, "%s_%s.xml" % (os.path.splitext(
            os.path.basename(dl_url))[0], datetime.now().strftime("%y-%m-%d")))
    if os.path.exists(dl_file_name):
        logger.debug("Deleted existing file %s" % dl_file_name)
        return False

    with open(dl_file_name, "w") as dlf:
        dlf.write(req.text)

    return dl_file_name
Example #8
0
    def check_mandi_locations(self):
        logger.debug("Check mandi locations")
        mandi_prices = self.mongo_helper.db["mandi_prices"]
        mandi_locations = self.mongo_helper.db["mandi_locations"]

        cursor = mandi_prices.find()
        for mp in cursor:
            state = (mp["state"]).lower()
            district = (mp["district"]).lower()
            market = (mp["market"]).lower()

            query = {"state": state, "district": district, "market": market}
            doc = mandi_locations.find_one(query)

            if doc is None:
                doc = {"state": state, "district": district, "market": market}
                doc["_id"] = mandi_locations.insert(doc)

            if not "cics_geocode" in doc:
                cics_data = self.__cics_geocode(state, district, market)
                if cics_data and len(cics_data) > 0:
                    mandi_locations.update(
                        {"_id": doc["_id"]},
                        {"$set": {
                            "cics_geocode": cics_data
                        }})

            if not "nm_geocode" in doc:
                nm_data = self.__nominatim_geocode(state, district, market)
                if nm_data and len(nm_data) > 0:
                    mandi_locations.update({"_id": doc["_id"]},
                                           {"$set": {
                                               "nm_geocode": nm_data
                                           }})

            logger.debug("Inserted new mandi location for %s %s %s" %
                         (state, district, market))
Example #9
0
def run_ingest(config, xmlfile):

    with open(xmlfile, 'rb') as f:
        xmlstr = f.read()

    xmldict = xmltodict.parse(xmlstr)
    tables = xmldict["soap:Envelope"]["soap:Body"]["showResponse"][
        "showResult"]["diffgr:diffgram"]["NewDataSet"]["Table"]
    data = []
    for t in tables:
        obj = {
            "arrival_date": t["Arrival_Date"],
            "district": t["District"],
            "market": t["Market"],
            "max_price": t["Max_x0020_Price"],
            "min_price": t["Min_x0020_Price"],
            "modal_price": t["Modal_x0020_Price"],
            "state": t["State"],
            "variety": t["Variety"]
        }

        if "Commodity" in t:
            obj["commodity"] = t["Commodity"]
        elif "Column1" in t:
            obj["commodity"] = t["Column1"]
        else:
            logger.warn("Commodity not found in %s" % t["@diffgr:id"])
            continue

        logger.debug("Inserted %s %s %s" %
                     (obj["commodity"], obj["market"], obj["arrival_date"]))
        data.append(obj)

    mongo_helper = MongoHelper(config)
    mongo_helper.rename_collection("mandi_prices")
    mongo_helper.save("mandi_prices", docs=data)
Example #10
0
    def __cics_geocode(self, state, district, market):
        f_str_state = ""
        f_str_district = ""
        for st in self.states:
            if st['name'].lower() == state.lower():
                state_id = st['id']
                f_str_state = '"state":"' + state_id + '"'
                for di in st['districts']:
                    if di['name'].lower() == district.lower():
                        district_id = di['id']
                        f_str_district = ', "district":"' + district_id + '"'
                        break

        f_param = "{" + f_str_state + f_str_district + "}"
        data = urllib.urlencode({"q": market, "f": f_param})
        req = requests.post("http://india.csis.u-tokyo.ac.jp/geocode-cgi/census_ajax_json.cgi", data=data)
        logger.debug("cics request for %s %s %s" % (state, district, market))

        xmldict = xmltodict.parse(req.text)
        if "markers" in xmldict:
            results = xmldict["markers"]
            if results and "marker" in results:
                return results["marker"]
        return None