예제 #1
0
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]

        self.url_today = "https://planerkulturalny.pl/api/rest/events.json?start_date="
        self.scrap = Scrap()
예제 #2
0
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]

        self.url_today = "http://go.wroclaw.pl/api/v1.0/events?key=1011488156695333384118402645947989718531&time-from="
        self.category = [
            "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton",
            "Rozrywka", "Dziecko"
        ]
        self.scrap = Scrap()
예제 #3
0
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]
        self.url_today = "http://www.poznan.pl/mim/public/ws-information/?co=getCurrentDayEvents"
        self.url_to_given_day = "http://www.poznan.pl/mim/public/ws-information/?co=getEventsToDate&dateTo="
        self.category = [
            "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton",
            "Rozrywka", "Dziecko"
        ]

        self.scrap = Scrap()
예제 #4
0
class fillDB:
    def __init__(self, *args, **kwargs):
        self.s = Scrap()
        # self.eP = EventAPI_Poznan()

    def getEvents(self, dateStart=None, dateEnd=None):
        eventsScraping = self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/warszawa')
        eventsScraping = eventsScraping + self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/krakow')
        eventsScraping = eventsScraping + self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/trojmiasto')
        eventsScraping = eventsScraping + self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/poznan')
        eventsScraping = eventsScraping + self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/zakopane')
        eventsScraping = eventsScraping + self.s.scrap_kiwiportal(
            'https://www.kiwiportal.pl/wydarzenia/m/wroclaw')
        # # -- save/read to txt file --
        # with open('/home/sleter/Documents/Github/EVENTION/EVENTION.DataHarvester/eventScraping.txt', 'w+') as f:
        #     for item in eventsScraping:
        #         f.write("%s\n" % item)
        # --
        # eventsScraping = []
        # with open('/home/sleter/Documents/Github/EVENTION/EVENTION.DataHarvester/eventScraping.txt', 'r') as f:
        #     for item in f:
        #         eventsScraping.append(item)
        # eventsScraping = eventsScraping + self.eP.get_event_today()
        return eventsScraping

    def load_to_database(self):
        events = self.getEvents()
        url = "http://localhost:9000/event/create"
        for event_json in events:
            r = requests.post(url, json=json.loads(event_json))
            print("Status code: {}".format(r.status_code))
예제 #5
0
class EventAPI_Wroclaw:
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]

        self.url_today = "http://go.wroclaw.pl/api/v1.0/events?key=1011488156695333384118402645947989718531&time-from="
        self.category = [
            "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton",
            "Rozrywka", "Dziecko"
        ]
        self.scrap = Scrap()

    def _save_json(self, url, name):
        """
        METHOD TO TESTS
        Save json from requests
        :param url: address url
        :param name: file name
        """
        _json = requests.get(url)

        with open(name, "w", encoding="UTF-8") as f:
            f.write(_json.text)

    def _save_json_today(self, date):
        """
        METHOD TO TESTS
        Saving to json current day events' date
        :param date: current day
        """
        self._save_json(self.url_today + date + "&time-to=" + date,
                        "events_today_Wroclaw.json")

    def _save_json_to_given_day(self, date, end_date):
        """
        METHOD TO TESTS
        Saving to json to current day events' date
        :param date: current day
        :param end_date: current day + 7 days
        """
        self._save_json(self.url_today + date + "&tome-to=" + end_date,
                        "events_to_given_day_Wroclaw.json")

    def get_json(self, name_json):
        """
        METHOD TO TESTS
        Read json from file and map to dic
        :param name_json: name of json
        :return: list of dict
        """
        with open(name_json, "r", encoding="UTF-8") as f:
            r_json = f.read()

        result = json.loads(r_json)
        return result

    def make_request_and_get_json(self, url):
        """
        Make request and return a list of dict
        :param url: address url
        :return: list of dict
        """
        r_json = requests.get(url)
        result = json.loads(r_json.text)
        return result

    def _get_first_sentence(self, long_description):
        """
        Get short Description from long description
        :param long_description: event long description
        :return: short description with 3 dots.
        """
        result = str(long_description.split(".")[0])
        return result + "..."

    def _check_category(self, category):
        #1-sport, 2-Kultura, 3-Koncert, 4-Targi, 5-Inne, 6-Hackaton 7 rozrywka 8 dziecko

        if "Biegi" in category.split(" "):
            return 1
        elif category in self.category:
            return self.category.index(category) + 1
        else:
            return 5

    def parse_data(self, date):
        try:
            year, month, day = date.split('-')
        except AttributeError:
            return False
        return datetime.datetime(int(year), int(month), int(day), int(0),
                                 int(0), int(0))

    def parse_json(self, list_of_dic):
        """
        :param list_of_dic:
        :return:
        """
        EVENT = []

        #['name', 'shortDescription', 'longDescription', 'creationDate', 'eventStart', 'eventEnd', 'ownerId', 'geoJSON', 'imageSource',  'address', addressCity]

        for dic in list_of_dic["items"]:
            event = {"event": {}, "categories": ""}

            try:

                event['event']["name"] = dic["offer"]["title"]
                event['event']["shortDescription"] = self._get_first_sentence(
                    dic["offer"]["longDescription"])
                event['event']["longDescription"] = dic["offer"][
                    "longDescription"]
                event['event']["creationDate"] = datetime.datetime.now()
                event['event']["eventStart"] = self.parse_data(
                    dic["startDate"].split("T")[0])
                event['event']["eventEnd"] = self.parse_data(
                    dic["endDate"].split("T")[0])
                event['event']["ownerId"] = 1

                try:
                    event['event']["imageSource"] = dic["offer"]["mainImage"][
                        "standard"]
                except:
                    event['event']["imageSource"] = ""

                try:
                    event['event']["address"] = "Polska, " + dic["address"][
                        "street"] + " " + dic["address"]["zipCode"]
                    event['event']["addressCity"] = dic["address"]["city"]
                except:
                    event["event"]["address"] = "Polska, " + dic["address"][
                        "street"] + " " + dic["address"]["city"]
                    event['event']["addressCity"] = dic["address"]["city"]

            except:
                pass
            event['event']["geoJSON"] = str(
                self.scrap.create_geojson(query=event["event"]["address"]))
            try:
                event["categories"] = [
                    self._check_category(dic["offer"]["categories"][0]["name"])
                ]
            except:
                event['categories'] = [5]  #Inne

            def date_converter(o):

                if isinstance(o, (datetime.date, datetime.datetime)):
                    return o.isoformat()

            e = json.dumps(event, ensure_ascii=False, default=date_converter)
            if e not in EVENT:
                EVENT.append(e)

        return EVENT

    def get_event_today(self):
        date = datetime.datetime.now()
        str_date = str(date.date())

        #self._save_json_today(str_date) #to test
        #list_of_dict = self.get_json("events_today_Wroclaw.json") #to test

        list_of_dict = self.make_request_and_get_json(self.url_today +
                                                      str_date)
        result = self.parse_json(list_of_dict)
        return result

    def get_event_7days(self):
        date = datetime.datetime.now()
        str_date = str(date.date())
        end_date = datetime.datetime.now() + datetime.timedelta(
            days=7)  # get current day and add 7 days
        str_end_date = str(end_date.date())

        #self._save_json_to_given_day(str_date, str_end_date) #to test
        #dict= self.get_json("events_to_given_day_Wroclaw.json") #to test

        dict = self.make_request_and_get_json(self.url_today + str_date +
                                              "&time_to=" + str_end_date)
        result = self.parse_json(dict)
        return result
예제 #6
0
class EventAPI_Poznan:
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]
        self.url_today = "http://www.poznan.pl/mim/public/ws-information/?co=getCurrentDayEvents"
        self.url_to_given_day = "http://www.poznan.pl/mim/public/ws-information/?co=getEventsToDate&dateTo="
        self.category = [
            "Sport", "Kultura", "Koncert", "Targi", "Inne", "Hackaton",
            "Rozrywka", "Dziecko"
        ]

        self.scrap = Scrap()

    def _save_xml(self, url, name):
        """
        METHOD TO TESTS
        Save xml from url
        :param url: address url
        :param name: file name
        """
        request = requests.get(url)
        xml = request.text
        with open(name, "w", encoding="UTF-8") as f:
            f.write(xml)

    def _save_xml_today(self):
        """
        METHOD TO TESTS
        Saving to xml current day events' date
        :return:
        """
        self._save_xml(self.url_today, "events_today_Poznan.xml")

    def _save_xml_to_given_day(self, date):
        """
        METHOD TO TESTS
        Saving to xml to current day events' date
        :param data: Format data - e.g. 2019-05-01
        :return:
        """

        self._save_xml(self.url_to_given_day + date,
                       "events_to_given_day_Poznan.xml")

    def get_xml(self, name_xml):
        """
        METHOD TO TESTS
        Read xml from file
        :return:
        """
        with open(name_xml, "r", encoding="UTF-8") as f:
            xml = f.read()

        tree = ET.ElementTree(ET.fromstring(xml))
        root = tree.getroot()

        return root

    def make_request_and_get_root(self, url):
        """
        Make request and return a root of xml file
        :param url: address url
        :return: root of xml file
        """
        request = requests.get(url)
        xml = request.text
        tree = ET.ElementTree(ET.fromstring(xml))
        root = tree.getroot()
        return root

    def _get_first_sentence(self, long_description):
        """
        Get short Description from long description
        :param long_description:
        :return:
        """
        result = str(long_description.split(".")[0])

        return result + "..."

    def _check_category(self, category):
        # 1-sport, 2-Kultura, 3-Koncert, 4-Targi, 5-Inne, 6-Hackaton 7 rozrywka 8 dziecko

        if "Kultura" in category.split(" "):
            return 2
        elif category in self.category:
            return self.category.index(category) + 1
        elif "Konferencje," in category.split(" "):
            return 5  #KONFERENCJA
        else:
            return 5

    def parse_data(self, date, date1=None):
        if date1 != None:
            date = date1

        try:
            year, month, day = date.split(' ')[0].split('-')

            hour, minute, seconds = date.split(' ')[1].split(':')

        except AttributeError:
            return False
        result = datetime.datetime(int(year), int(month), int(day), int(hour),
                                   int(minute), 0)
        return result

    def parse_xml(self, root):
        """
        Parsing xml
        :param root: xml root
        :return: json with parsed data
        """
        EVENT = []

        for elem in root:
            try:

                url = elem[2].text  #event_url
                image_url = get_image_src(url)  # get image src from scrapping
                category = self._check_category(elem[10].text)
                geoJSON = self.scrap.create_geojson(query=elem[5][2].text)

                event_array = [
                    elem[3][0][0].text,  #name
                    self._get_first_sentence(
                        elem[3][0][2].text),  #shortDescription
                    elem[3][0][2].text,  #longDescription
                    self.parse_data(elem[1].text),  #creationDate
                    self.parse_data(elem[7].text),  #eventStart
                    self.parse_data(elem[8].text, elem[7].text),  #eventEnd
                    1,  #ownerId
                    str(geoJSON),  #geoJSON
                    image_url,  #imageSource
                    elem[5][2].text,  #addres
                    "Poznan"
                ]  #adressCity
                event = {'event': {}, 'categories': [category]}

                for label, eve in zip(self.event_labels, event_array):
                    event['event'][label] = eve

                def date_converter(o):
                    if isinstance(o, (datetime.date, datetime.datetime)):
                        return o.isoformat()

                e = json.dumps(event,
                               ensure_ascii=False,
                               default=date_converter)
                if e not in EVENT:

                    EVENT.append(e)

                if len(EVENT) > 10:
                    break
            except:
                pass

        #result = json.dumps(EVENT, ensure_ascii=False, default=date_converter)
        return EVENT

    def get_event_today(self):
        #self._save_xml_today() #to test
        #root = self.get_xml("events_today_Poznan.xml") #to test
        root = self.make_request_and_get_root(self.url_today)
        result = self.parse_xml(root)
        return result

    def get_event_7days(self):
        #self._save_xml_to_given_day(date) #to test
        #root = self.get_xml("events_to_given_day_Poznan.xml") #to test
        date = datetime.datetime.now() + datetime.timedelta(
            days=7)  #get current day and add 7 days
        str_date = str(date.date())
        root = self.make_request_and_get_root(self.url_to_given_day + str_date)
        result = self.parse_xml(root)
        return result
예제 #7
0
class EventAPI_Gdansk:
    def __init__(self):
        with open('./config.json') as f:
            config_json = json.load(f)
        self.event_labels = config_json["event_labels"]

        self.url_today = "https://planerkulturalny.pl/api/rest/events.json?start_date="
        self.scrap = Scrap()

    def _save_json(self, url, name):
        """
        METHOD TO TESTS
        Save json from requests
        :param url: address url
        :param name: file name
        """
        _json = requests.get(url)

        with open(name, "w", encoding="UTF-8") as f:
            f.write(_json.text)

    def _save_json_today(self, date):
        """
        METHOD TO TESTS
        Saving to json current day events' date
        :param date: current day
        """
        self._save_json(self.url_today + date, "events_today_Gdansk.json")

    def _save_json_to_given_day(self, date, end_date):
        """
        METHOD TO TESTS
        Saving to json to current day events' date
        :param date: current day
        :param end_date: current day + 7 days
        """
        self._save_json(self.url_today + date + "&end_date=" + end_date,
                        "events_to_given_day_Gdansk.json")

    def get_json(self, name_json):
        """
        METHOD TO TESTS
        Read json from file and map to dic
        :param name_json: name of json
        :return: list of dict
        """
        with open(name_json, "r", encoding="UTF-8") as f:
            r_json = f.read()

        result = json.loads(r_json)
        return result

    def make_request_and_get_json(self, url):
        """
        Make request and return a list of dict
        :param url: address url
        :return: list of dict
        """
        r_json = requests.get(url)
        result = json.loads(r_json.text)
        return result

    def _get_first_sentence(self, long_description):
        """
        Get short Description from long description
        :param long_description: event long description
        :return: short description with 3 dots.
        """
        result = str(long_description.split(".")[0])
        return result + "..."

    def get_category(self, event):
        """

        :return:
        """
        if event["category"] not in self.category:
            self.category[event["category"]] = event["name"]

        # 96 - Kultura 19 - Teatr 51- Sztuka  1 - Sztuka/Kultura/Kino
        # 77 - Sport  83 - Kultura 35 - Teatr/Muzyka

    def parse_data(self, date):
        try:
            year, month, day = date.split('-')
        except AttributeError:
            return False
        return datetime.datetime(int(year), int(month), int(day), int(0),
                                 int(0), int(0))

    def parse_json(self, list_of_dic):
        """

        :param list_of_dic:
        :return:
        """

        EVENT = []
        #['name', 'shortDescription', 'longDescription', 'creationDate', 'eventStart', 'eventEnd', 'ownerId', 'geoJSON', 'imageSource', 'category', 'address', addressCity]

        for dic in list_of_dic:
            event = {
                "event": {},
                "categories": "",
            }
            try:

                event["event"]["name"] = dic["name"]
                event["event"]["shortDescription"] = self._get_first_sentence(
                    dic["descLong"])
                event["event"]["longDescription"] = dic["descLong"]
                event["event"]["creationDate"] = datetime.datetime.now()
                event["event"]["eventStart"] = self.parse_data(
                    dic["startDate"].split("T")[0])
                event["event"]["eventEnd"] = self.parse_data(
                    dic["endDate"].split("T")[0])
                event["event"]["ownerId"] = 1
                #print(dic['place']['name'])
                try:
                    event["event"]["geoJSON"] = str(
                        self.scrap.create_geojson(query="Polska, Gdańsk " +
                                                  dic['place']['name']))
                except:
                    event["event"]["geoJSON"] = "{}"
                try:
                    event["event"]["imageSource"] = dic["attachments"][0][
                        "fileName"]
                except:
                    event["event"]["imageSource"] = None

                if dic["categoryId"] == 77:
                    event["categories"] = [1]  # sport
                else:
                    event["categories"] = [5]  # kultura

                event["event"]["address"] = "Gdańsk, " + dic["place"]["name"]
                event["event"]["addressCity"] = "Gdańsk"

                def date_converter(o):
                    if isinstance(o, (datetime.date, datetime.datetime)):
                        return o.isoformat()

                e = json.dumps(event,
                               ensure_ascii=False,
                               default=date_converter)
                if e not in EVENT:
                    EVENT.append(e)

            except:
                pass

        return EVENT

    def get_event_today(self):
        date = datetime.datetime.now()
        str_date = str(date.date())

        #self._save_json_today(str_date) #to test
        #list_of_dict = self.get_json("events_today_Gdansk.json") #to test

        list_of_dict = self.make_request_and_get_json(self.url_today +
                                                      str_date)
        result = self.parse_json(list_of_dict)
        return result

    def get_event_7days(self):
        date = datetime.datetime.now()
        str_date = str(date.date())
        end_date = datetime.datetime.now() + datetime.timedelta(
            days=7)  # get current day and add 7 days
        str_end_date = str(end_date.date())

        #self._save_json_to_given_day(str_date, str_end_date) #to test
        #dict= self.get_json("events_to_given_day_Gdansk.json") #to test

        dict = self.make_request_and_get_json(self.url_today + str_date +
                                              "&end_date=" + str_end_date)
        result = self.parse_json(dict)
        return result
예제 #8
0
 def __init__(self, *args, **kwargs):
     self.s = Scrap()