Beispiel #1
0
def main():
    db = Database(DATABASE)
    db.update()
    count = 1
    total = len(db.tables)
    for table in db.tables:
        url = db.tables[table]['url']
        timezone = db.tables[table]['timezone']
        try:
            site = BeautifulSoup(urlopen(url))
        except urllib.error.HTTPError:
            print("error on %s" % (url))
        data = []
        for tr in site('tr'):
            raw_datum = tr.text.strip().split('\n')
            try:
                date = datetime.strptime(
                    raw_datum[0],
                    "%d/%m/%Y %H:%M").replace(tzinfo=pytz.timezone(timezone))
                value = float(raw_datum[1])
                datum = (date, value)
            except ValueError:
                continue
            data.append(datum)
        db.store_data(data, table)
        sleep(5)
        print("%d / %d is complete" % (count, total))
        count += 1
Beispiel #2
0
def main():
    db = Database(DATABASE)
    db.update()
    count = 1
    total = len(db.tables)
    for table in db.tables:
        url = db.tables[table]['url']
        timezone = db.tables[table]['timezone']
        try:
          site = BeautifulSoup(urlopen(url))
        except urllib.error.HTTPError:
          print ("error on %s" %(url))
        data = []
        for tr in site('tr'):
            raw_datum = tr.text.strip().split('\n')
            try:
                date = datetime.strptime(raw_datum[0], "%d/%m/%Y %H:%M").replace(
                        tzinfo=pytz.timezone(timezone)
                        )
                value = float(raw_datum[1])
                datum = (date, value)
            except ValueError:
                continue
            data.append(datum)
        db.store_data(data, table)
        sleep(5)
        print ("%d / %d is complete" %(count, total))
        count += 1
Beispiel #3
0
    def _get_labeled(self, pseudo_count):
        db = Database(fullpath("data/data.db"))
        data = Data(mediatype="movie")
        titles = data.titles_list()
        labeled = []
        try:
            select_existing = db.select(table="main", fields="all")
        except OperationalError:
            data.collect(args=data.titles_list(),
                         addtl_items=["boxoffice", "oscars", "review"])
            i = 0
            for movie in labeled:
                if i == 0:
                    print("Making Table main")
                    db.make_table(table="main",
                                  fields=[k for k in movie.keys()])
                else:
                    pass
                db.update(row=movie)
                db.conn.commit()
                i += 1
        else:
            if len(select_existing) == len(titles):
                header = select_existing[0].keys()
                for x in select_existing:
                    labeled.append(dict(map(lambda x, y: (x, y), header, x)))

            else:
                data.collect(args=data.titles_list(),
                             addtl_items=["boxoffice", "oscars", "review"])
                i = 0
                for movie in labeled:
                    if i == 0:
                        print("Making Table main")
                        db.make_table(table="main",
                                      fields=[k for k in movie.keys()])
                    else:
                        pass
                    db.update(row=movie)
                    db.conn.commit()
                    i += 1

        lab = self._pseudo_cases(data=labeled, n=pseudo_count, classx="0")
        return lab