Esempio n. 1
0
    def _import_db_backup(self):
        """Adds new ofer from RSS feed."""
        # connects to input database
        
        # all offers consolidated
        # conn = sqlite3.connect('elpaso_github.sqlite')
        # db_cursor = conn.cursor()
        # db_cursor.execute("SELECT * FROM georezo")
        
        # all offers consolidated
        conn = sqlite3.connect('elpaso_new.sqlite')
        db_cursor = conn.cursor()
        db_cursor.execute("SELECT * FROM elpaso_annonce")
        
        input_rows = db_cursor.fetchall()
        print(len(input_rows))

        # closing input db connection
        conn.close()

        for input_row in input_rows:
            print(input_row[0], type(input_row), len(input_row))
            # check if id already exists
            if GeorezoRSS.objects.filter(id_rss = input_row[0]).exists():
                logging.error("Offer ID already exists: {}".format(input_row[0]))
                continue
            else:
                pass

            # formatting date
            publication_date = arrow.get(input_row[3],
                                         "ddd, D MMM YYYY HH:mm:ss Z")

            try:
                offer = GeorezoRSS(id_rss=input_row[0],
                                   title=input_row[1],
                                   content=input_row[2],
                                   date_pub=publication_date.format(),
                                   source=1)
                offer.save()
                logging.info("New offer added: {}".format(input_row[0]))
            except IntegrityError:
                # in case of duplicated offer
                logging.error("Offer ID already exists: {}".format(input_row[0]))
                continue
            except Exception as e:
                logging.error(e)
Esempio n. 2
0
    def _add_new_offers(self):
        """Adds new ofer from RSS feed."""
        # Get the id of the last offer parsed
        with open(path.abspath(r"last_id_georezo.txt"), "r") as fichier:
            last_id = int(fichier.readline())
        logger.info("Previous offer ID: {}".format(last_id))
        # list to store offers IDs
        li_id = []

        # reset offers counter
        compteur = 0

        # RSS parser
        feed = feedparser.parse("http://georezo.net/extern.php?fid=10")
        logger.info("Parser created")

        # looping on feed entries
        for entry in feed.entries:
            # get the ID cleaning 'link' markup
            try:
                job_id = int(entry.id.split("#")[1].lstrip("p"))
            except AttributeError(e):
                logger.error("Feed index corrupted: {} - ({})".format(feed.entries.index(entry), e))
                continue

            # first offer parsed is the last published, so the biggest ID. Put the ID in
            # the text file dedicated.
            if feed.entries.index(entry) == 0:
                with open(path.abspath(r"last_id_georezo.txt"), "w") as fichier:
                    fichier.write(str(job_id))
            else:
                pass

            # formating publication date
            publication_date = arrow.get(entry.published, "ddd, D MMM YYYY HH:mm:ss Z")

            # if the entry's ID is greater than ID stored in the file, that means
            # the offer is more recent and has not been processed yet.
            if job_id > last_id:
                try:
                    offer = GeorezoRSS(
                        id_rss=job_id,
                        title=entry.title,
                        content=entry.summary,
                        date_pub=publication_date.format(),
                        source=True,
                    )
                    offer.save()
                    # incrementing counter
                    compteur += 1
                    # adding the offer's ID to the list of new offers to process
                    li_id.append(job_id)
                    logger.info("New offer added: {}".format(job_id))
                except IntegrityError:
                    # in case of duplicated offer
                    logger.error("Offer ID already exists: {}".format(job_id))
                    continue
                except Exception(e):
                    logger.error(e)
            else:
                logger.info("Offer ID inferior to the last registered: {}".format(job_id))
                pass
        # after loop
        logger.info("{} offers have been added.".format(compteur))

        # if new offers => launch next processes
        if compteur > 0:
            # log info
            logger.info("New offers IDs: " + str(li_id))
            # analyzing offers
            Analizer(li_id)
            # # fillfulling the DB
            # models.Fillin(li_id)
        else:
            pass