def convert_file(fname, out_arr): def get_date(item): dt = None try: dt = datetime.strptime(item, "%m/%d/%y %H:%M") except: try: dt = datetime.strptime(item, "%y-%m-%d %H:%M") except: try: dt = datetime.strptime(item, "%Y-%m-%d %H:%M") except: dt = datetime.strptime(item, "%m/%d/%Y %H:%M") return dt i = 0 f = open(fname, 'rU') reader = read_csv(f, engine='c', header=0) for _, row in reader.iterrows(): r = BikeRide() tm = row['Duration'] tm = tm.replace('h ', ':').replace('m ', ':').replace('sec.', '').replace('s', '') h, m, s = map(int, tm.split(':')) r.duration = h * 60 * 60 + m * 60 + s r.start_date = get_date(row['Start date']) r.end_date = get_date(row['End date']) try: r.start_station = find_station(row['Start Station']) r.end_station = find_station(row['End Station']) except: print 'couldnt find', row['Start Station'], 'or', row[ 'End Station'] continue r.subscribed = False if row['Subscription Type'] == 'Casual' else True out_arr.append(r) i += 1 if i % 10000 == 0: print i f.close()
def convert_file(fname, out_arr): def get_date(item): dt = None try: dt = datetime.strptime(item, "%m/%d/%y %H:%M") except: try: dt = datetime.strptime(item, "%y-%m-%d %H:%M") except: try: dt = datetime.strptime(item, "%Y-%m-%d %H:%M") except: dt = datetime.strptime(item, "%m/%d/%Y %H:%M") return dt i = 0 f = open(fname, 'rU') reader = read_csv(f, engine='c', header=0) for _, row in reader.iterrows(): r = BikeRide() tm = row['Duration'] tm = tm.replace('h ', ':').replace('m ', ':').replace('sec.', '').replace('s', '') h, m, s = map(int, tm.split(':')) r.duration = h * 60 * 60 + m * 60 + s r.start_date = get_date(row['Start date']) r.end_date = get_date(row['End date']) try: r.start_station = find_station(row['Start Station']) r.end_station = find_station(row['End Station']) except: print 'couldnt find', row['Start Station'], 'or', row['End Station'] continue r.subscribed = False if row['Subscription Type'] == 'Casual' else True out_arr.append(r) i += 1 if i % 10000 == 0: print i f.close()
dct.next() for r in dct: i += 1 if i % 10000 == 0: print i ride = BikeRide() st_dt = datetime.strptime(r['start date'], "%Y-%m-%dT%H:%M:%S") ed_dt = datetime.strptime(r['end date'], "%Y-%m-%dT%H:%M:%S") ride.start_date = st_dt ride.end_date = ed_dt ride.duration = r['duration'] ride.subscribed = False if r['user type'] == 'casual' else True try: ride.start_station = find_station(r['start station']) ride.end_station = find_station(r['end station']) except: failed += 1 print 'fail number', failed failed_ls.append(r) continue ride_ls.append(ride) f.close() db_session.add_all(ride_ls) db_session.commit() # # SUBWAY STATIONS #
f = open('ride_data.json', 'r') dct = ijson.items(f, 'item') # ijson lets us stream the file rather than open it all at once --- too big i = 0 failed = 0 for r in dct: i += 1 if i % 10000 == 0: print i ride = BikeRide() try: ride.start_station = find_station(r['start station']) ride.end_station = find_station(r['end station']) except: print 'failed on station', r['start station'], 'or', r['end station'] try: st_dt = datetime.strptime(r['start date'], "%Y-%m-%dT%H:%M:%S") ed_dt = datetime.strptime(r['end date'], "%Y-%m-%dT%H:%M:%S") ride.start_date = st_dt ride.end_date = ed_dt ride.duration = r['duration'] ride.subscribed = False if r['user type'] == 'casual' else True # ride_ls.append(ride) except : failed += 1 print 'fail number', failed print {'ride': r, 'exception': e} failed_ls.append({'ride': r, 'exception': e})