def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit=-1): (beg_date, end_date) = dkuwikipedia.get_daterange(self.config) for line in self.config.get("pages", "").split("\n"): line = line.strip() line = line.split(" ") project = line[0] page = " ".join(line[1:]) project = project.strip() print "Query for %s : %s" % (project, page) resp = dkuwikipedia.query_page(project, page, beg_date, end_date) dic = resp.json() for item in dic.get("items", []): yield { "project": project, "date": dkuwikipedia.parse_and_format_yyyymmddhh( item["timestamp"]), "page": item["article"], "views": item["views"], }
def get_rows(): for item in pages_list_dataset.iter_rows(): project = item["project"] page = item["page"] print "Query for %s : %s" % (project, page) resp = dkuwikipedia.query_page(project, page, beg_date, end_date) dic = resp.json() for item in dic.get("items", []): yield { "project" : project, "date" : dkuwikipedia.parse_and_format_yyyymmddhh(item["timestamp"]), "page" : item["article"], "views" : item["views"], }
def get_rows(): for item in pages_list_dataset.iter_rows(): project = item["project"] page = item["page"] print "Query for %s : %s" % (project, page) resp = dkuwikipedia.query_page(project, page, beg_date, end_date) dic = resp.json() for item in dic.get("items", []): yield { "project": project, "date": dkuwikipedia.parse_and_format_yyyymmddhh(item["timestamp"]), "page": item["article"], "views": item["views"], }
def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit = -1): (beg_date, end_date) = dkuwikipedia.get_daterange(self.config) for line in self.config.get("pages", "").split("\n"): line = line.strip() line = line.split(" ") project = line[0] page = " ".join(line[1:]) project = project.strip() print "Query for %s : %s" % (project, page) resp = dkuwikipedia.query_page(project, page, beg_date, end_date) dic = resp.json() for item in dic.get("items", []): yield { "project" : project, "date" : dkuwikipedia.parse_and_format_yyyymmddhh(item["timestamp"]), "page" : item["article"], "views" : item["views"], }