Ejemplo n.º 1
0
    def __call__(self, input_file_path, output_file_path):
        input_data = common.loadJSON(input_file_path)[:5]

        output_data = []

        bar = tqdm.tqdm(total=len(input_data))
        for input_item in input_data:
            try:
                question = self.getQuestionFromURL(input_item["url"])

                output_item = {}
                output_item["question"] = question
                output_item["url"] = input_item["url"]
                output_item["answer_entity_ids"] = input_item[
                    "answer_entity_ids"]

                output_data += self.convert(output_item)

            except Exception as e:
                print("Exception: %s on url %s" % (str(e), input_item["url"]))

            bar.update()

        bar.close()
        common.dumpJSON(output_data, output_file_path)
Ejemplo n.º 2
0
def generate(input_dir_path, output_file_path):
    data = defaultdict(dict)

    files = glob.glob(str(input_dir_path / "**/*.json"), recursive = True)
    for file in files:
        item = common.loadJSON(file)
        data[item["id"].split("_")[0]][item["id"]] = {"id": item["id"], "name": item["name"], "categories": item["properties"], "location": [item["latitude"], item["longitude"]]}

    common.dumpJSON(data, output_file_path)
Ejemplo n.º 3
0
    def __call__(self, input_file_path, output_file_path, cities_file_path):
        cities = common.loadJSON(cities_file_path)
        input_data = common.loadJSON(input_file_path)[:5]

        output_data = []
        bar = tqdm.tqdm(total=len(input_data))
        for input_item in input_data:
            try:
                post = self.getPostFromURL(input_item["url"])
                post["city"] = cities[int(
                    input_item["answer_entity_ids"][0].split("_")[0])]
                output_data.append(post)
            except Exception as e:
                print("Exception: %s on url %s" % (str(e), input_item["url"]))

            bar.update()

        bar.close()
        common.dumpJSON(output_data, output_file_path)
Ejemplo n.º 4
0
    def __call__(self, city_urls_file_path, posts_urls_file_path):
        city_post_urls = OrderedDict()
        city_urls = common.loadJSON(city_urls_file_path)

        bar = tqdm.tqdm(total=len(city_urls))
        for city, city_url in city_urls.items():
            self.count = 0
            post_urls = self.getPostURLsFromCityURL(city_url=city_url)

            city_post_urls[city] = {}
            city_post_urls[city]["city_url"] = city_url
            city_post_urls[city]["post_urls"] = post_urls

            bar.update()

        bar.close()
        common.dumpJSON(city_post_urls, posts_urls_file_path)
Ejemplo n.º 5
0
    def __call__(self, posts_urls_file_path, posts_file_path):
        posts_urls = common.loadJSON(posts_urls_file_path)

        posts = []
        bar = tqdm.tqdm(total = sum([len(item["post_urls"]) for item in posts_urls.values()]))
        for city, item in posts_urls.items():
            for url in item["post_urls"]:
                try:
                    post = self.getPostFromURL(url)
                    post["city"] = city
                    posts.append(post)
                except Exception as e:
                    pass

                bar.update()

        bar.close()
        common.dumpJSON(posts, posts_file_path)
Ejemplo n.º 6
0
 def __init__(self, city_entities_file_path) -> None:
     self.retries = 5
     self.city_entities = common.loadJSON(city_entities_file_path)