Ejemplo n.º 1
0
def main():

    with open("./data/businesses_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            if data[0] in rep:
                #print rep[data[0]], data
                continue
            rep[data[0]] = i
            rec = models.Restaurant(id = int(data[0]),
                             name = preprocess(data[1]),
                             address = preprocess(data[2]),
                             postal_code = int(data[4]) if data[4].isdigit() else None,
                             latitude = float(data[5]) if isFloat(data[5]) else None,
                             longitude = float(data[6]) if isFloat(data[6]) else None,
                             phone_number = data[7][1:4]+'-'+data[7][4:7]+'-'+data[7][7:11] if data[7].isdigit() else None)
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()


    with open("./data/inspections_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            rec = models.Inspections(business_id = int(data[0]),
                                    date = datetime.strptime(data[2], '%Y%m%d') if data[2] != '' else None,
                                    score = int(data[1]) if data[1].isdigit() else None,
                                    insp_type = data[3])      
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()

    with open("./data/violations_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            rec = models.Violations(business_id = int(data[0]),
                                    date = datetime.strptime(data[1], '%Y%m%d') if data[1] != '' else None,
                                    typeID = int(data[2]) if data[2].isdigit() else -1,
                                    risk_category = data[3],
                                    description = data[4])
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()
Ejemplo n.º 2
0
def main():
    with open("./app/data/movies.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            #print i
            if i == 1:
                continue
            br = bracket(data[1])
            mov = models.Movie(id = int(data[0]),
                             title = br[0][0: br[1]],
                             year = int(br[0][br[1]+1: br[1]+5]) if br[1] < len(br[0]) else 2011)
            genres = data[2].split('|')
            print mov.id, mov.year, mov.title
            for genre in genres:
                if genre != '(no genres listed)':
                    gre = models.Category(movie_id = int(data[0]),  category = preprocess(genre))
                    print gre.movie_id, gre.category
                db.session.add(gre)
            db.session.add(mov)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()

    with open("./app/data/links.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            print i
            if i == 1:
                continue
            link = models.Links(movie_id = int(data[0]),
                               imdb_link = int(data[1] if data[1].isdigit() else None),
                               moviedb_link = int(data[2]) if data[2].isdigit() else None)
            #print link.movie_id. link.imdb_link, link.moviedb_link
            db.session.add(link)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()

    with open("./app/data/ratings.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            #print i
            if i == 1:
                continue
            rating = models.Rating(user_id = int(data[0]),
                               movie_id = int(data[1]),
                               score = float(data[2])*1.0,
                               time = datetime.fromtimestamp(int(data[3])))
            print rating.user_id, rating.movie_id, rating.score, rating.time
            db.session.add(rating)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()
Ejemplo n.º 3
0
def main():

    with open("./data/businesses_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            if data[0] in rep:
                #print rep[data[0]], data
                continue
            rep[data[0]] = i
            rec = models.Restaurant(
                id=int(data[0]),
                name=preprocess(data[1]),
                address=preprocess(data[2]),
                postal_code=int(data[4]) if data[4].isdigit() else None,
                latitude=float(data[5]) if isFloat(data[5]) else None,
                longitude=float(data[6]) if isFloat(data[6]) else None,
                phone_number=data[7][1:4] + '-' + data[7][4:7] + '-' +
                data[7][7:11] if data[7].isdigit() else None)
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()

    with open("./data/inspections_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            rec = models.Inspections(
                business_id=int(data[0]),
                date=datetime.strptime(data[2], '%Y%m%d')
                if data[2] != '' else None,
                score=int(data[1]) if data[1].isdigit() else None,
                insp_type=data[3])
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()

    with open("./data/violations_plus.csv") as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        i = 0
        for data in reader:
            i += 1
            if i == 1:
                continue
            rec = models.Violations(
                business_id=int(data[0]),
                date=datetime.strptime(data[1], '%Y%m%d')
                if data[1] != '' else None,
                typeID=int(data[2]) if data[2].isdigit() else -1,
                risk_category=data[3],
                description=data[4])
            db.session.add(rec)
            if i % 1000 == 0:
                db.session.flush()
        db.session.commit()
Ejemplo n.º 4
0
def bracket(s):
    s_new = preprocess(s)
    for i in range(len(s_new)-1, -1, -1):
        if s_new[i] == '(':
            return (s_new, i)
    return (s_new, len(s_new))