def test_single_element_all_stypes(st): from datetime import date as dd if st == dt.stype.time64: return pt = (bool if st == dt.stype.bool8 else int if st.ltype == dt.ltype.int else float if st.ltype == dt.ltype.real else str if st.ltype == dt.ltype.str else dd if st == dt.stype.date32 else object) src = [True, False, True, None] if pt is bool else \ [1, 7, -99, 214, None, 3333] if pt is int else \ [2.5, 3.4e15, -7.909, None] if pt is float else \ ['Oh', 'gobbly', None, 'sproo'] if pt is str else \ [dd(2000, 5, 5), dd(2012, 12, 12), None] if pt is dd else \ [dt, st, list, None, {3, 2, 1}] df = dt.Frame(A=src, stype=st) frame_integrity_check(df) assert df.names == ("A", ) assert df.stypes == (st, ) for i, item in enumerate(src): x = df[i, 0] y = df[i, "A"] assert x == y if item is None or st == dt.stype.void: assert x is None else: assert isinstance(x, pt) if st == dt.stype.int8: assert (x - item) % 256 == 0 elif st == dt.stype.float32: assert abs(1 - item / x) < 1e-7 else: assert x == item
def extract_time_range(cubes, start, end): """Extract time ranged data.""" time_ranged_cubes = [] iris.util.unify_time_units(cubes) time_unit = cubes[0].coord('time').units.name dd_start = dd(start.year, start.month, start.day, 0, 0, 0) t_1 = cf_units.date2num(dd_start, time_unit, cf_units.CALENDAR_STANDARD) dd_end = dd(end.year, end.month, end.day, 0, 0, 0) t_2 = cf_units.date2num(dd_end, time_unit, cf_units.CALENDAR_STANDARD) for cube in cubes: time_constraint = iris.Constraint(time=lambda t: ( t_1 <= datetime_to_int_days(t.point, time_unit) <= t_2)) cube_slice = cube.extract(time_constraint) time_ranged_cubes.append(cube_slice) return time_ranged_cubes
def datetime_to_int_days(date_obj, tunit): """Return time point converted from cube datetime cell.""" if float(iris.__version__.split('.')[0]) >= 2.0: time_offset = get_time_offset(tunit) real_date = dd(date_obj.year, date_obj.month, date_obj.day, 0, 0, 0) days = (real_date - time_offset).days else: days = date_obj return days
def sBar(filename, user, title='title'): from jv3.study.ca_plot import make_filename numLines = lambda txt: len(txt.splitlines())-txt.splitlines().count('') aveSize = lambda a,b: int(float(a)/float(b)) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 wksToIndex = lambda rowWeek, colWeek : rowWeek + (colWeek)*COL_SEGMENTS msecToDate = lambda msec : datetime.datetime.fromtimestamp(float(msec)/1000.0) DAY_IN_MS = 1000*60*60*24 COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7,7,5 ## add, edit,edit, del,del notes = user.note_owner.all() allLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete']) data = r.matrix(0,nrow=COL_SEGMENTS, ncol=ROW_GROUPS*GROUP_TYPES) nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS)], [[] for n in range(ROW_GROUPS)] createdSize = [[0,0] for i in range(ROW_GROUPS)] ## [# notes, # lines] for each day of week editedSize = [[0,0] for i in range(ROW_GROUPS)] noteD = {} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] actDate, birthDate = msecToDate(log.when), msecToDate(note.created) actDay, birthDay = actDate.weekday(), birthDate.weekday() firstRecord, lastTime = min(actDate, birthDate ), max(actDate,birthDate) startOfDay = dd(lastTime.year, lastTime.month, lastTime.day) actTD = firstRecord - startOfDay actInPastWk = math.fabs(actTD.days) <= 6 ## Both .created and .when happened within (current day + 6 previous days) if (log.action == 'note-add'): ## Record Add data[wksToIndex(birthDay, actDay*GROUP_TYPES+0)] += 1 if ((log.noteText != None) and (log.noteText.count('') > 1) and (log.noteText.count('\n') < 100)): createdSize[actDay][0] += 1 ## Increment: ave note size, ##increment = 0 if note.deleted else 50 ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0 increment = log.noteText.count('')-1 ##note.version*10 createdSize[actDay][1] += int(increment) elif (log.action == 'note-save'): ## Record Save: Split (edit on day of note.created vs not) addVal = 1 if actInPastWk else 2 if (actInPastWk and log.noteid in nNewEdit[actDay]) or (not actInPastWk and log.noteid in nOldEdit[actDay]): continue ## We've already recorded this note for it's time-frame data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1 nNewEdit[actDay].append(log.noteid) if actInPastWk else nOldEdit[actDay].append(log.noteid) ## Register Log elif (log.action == 'note-delete'): ## Record Death addVal = 3 if actInPastWk else 4 data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1 pass r.png(file = make_filename(filename), w=1000,h=500) dayNames = ["Mon","Tues","Wed","Thur","Fri","Sat","Sun"] colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown') title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id) aveWidth = int(float(sum([elt[1] for elt in createdSize]))/float(sum([elt[0] for elt in createdSize]))) widths = [] [widths.extend([aveSize(elt[1],elt[0]), aveWidth, aveWidth, aveWidth, aveWidth]) for elt in createdSize] axisNames = [] [axisNames.extend([str(widths[i*5]),"","",str(dayNames[i]),""]) for i in range(ROW_GROUPS)] r.barplot(data, main=title,ylab='# Action Logs',beside=False, col=colors, space=r.c(3,1,0.1,1,.1), names=axisNames, width=c(widths)) devoff()
def get_date(str1, str2): year = str1[0:4] year = int(year) month = str2.split()[0] month = month[0:len(month) - 1] month = int(month) week = str2.split()[1] week = int(week[0:len(week) - 1]) s = f'{year:04d}-{month:02d}-01' first_day = dd.strptime(s, '%Y-%m-%d') first_week = first_day.weekday() if first_week == 0: day = week * 7 - 6 else: day = week * 7 - first_week + 1 return dd(year, month, day)
for cnt, row in train_data.iterrows(): hood, act = som1.somfwd(row.values) data['hood_'+signal].ix[cnt] = hood ### now take todays data, walk through, and place each row in a neighborhood today['hood_'+signal] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood_'+signal].ix[cnt] = hood #predict_cols = [x for x in data.columns if x.endswith('fret')] #for ret_col in predict_cols: ret_col = signal + '.fret' hood_ret = data.groupby('hood_'+signal)[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_'+ret_col] = today['hood_'+signal].apply(hood_ret.get) today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date)) if __name__== '__main__': from datetime import datetime as dd for d in pd.date_range(dd(2012,1,1),dd(2016,12,31)): if d.weekday() == 4: #run(dd(2010,1,29)) if COUNTRY_STUDY: run(d, only_country=COUNTRY_STUDY) else: run(d)
from datetime import date as dd halloween = dd(2014,10,31) print(halloween)
today['hood_std'] = None for cnt, row in today[use_cols].iterrows(): hood, act, csize, czscore, cavg, cstd = som1.somfwd(row.values, clusterSizeFlag=True) today['hood'].ix[cnt] = hood today['hood_ct'].ix[cnt] = csize today['hood_z'].ix[cnt] = czscore today['hood_avg'].ix[cnt] = cavg today['hood_std'].ix[cnt] = cstd if DEBUG: print 'group' for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_'+ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date)) if DEBUG: print 'done' if __name__ == '__main__': from datetime import datetime as dd if DEBUG: run(dd(2010, 1, 31)) else: for d in pd.date_range(dd(2008,1,1),dd(2011,12,31), freq='M'): run(d)
if '1stSection' in row: started = True elif not ended: if '2ndSection' in row: ended = True else: #started but not yet ended, we keep data cols = [] tokens = row.split(' ') cols.append(tokens[0]) cols.append(tokens[1]) cols.append(" ".join(tokens[2:])) output.append(cols) col_names = ['company_ct', 'mcap', 'sector'] data = pandas.DataFrame(output, columns=col_names) data['date'] = date.strftime('%Y%m%d') data.to_csv('mcap.csv/{:%Y%m}.csv'.format(date), index=False, header=False) return #for dt in pandas.DateRange(datetime.datetime(2014,6,1), datetime.datetime(2014,6,2)): for y in range(2010, 2016): end = 13 if y == 2015: end = 7 for m in range(1, end): parse_mcap(dd(y, m, 1))
### now take todays data, walk through, and place each row in a neighborhood today['hood'] = None for cnt, row in today[use_cols].iterrows(): hood, act = som1.somfwd(row.values) today['hood'].ix[cnt] = hood if DEBUG: print 'group' for ret_col in predict_cols: hood_ret = data.groupby('hood')[ret_col].median() hood_ret = hood_ret.to_dict() ### map the return back to securities today['pred_' + ret_col] = today['hood'].apply(hood_ret.get) today.to_csv(MODEL_NAME + '/forecast_{:%Y%m%d}.csv'.format(date)) if DEBUG: print 'done' if __name__ == '__main__': from datetime import datetime as dd run(dd(2010, 1, 29)) ''' #for d in pd.date_range(dd(2008,1,1),dd(2011,12,31)): run(d) if d.weekday() == 4: #run(dd(2010,1,29)) run(d) '''
def sBar(filename, user, title='title'): from jv3.study.ca_plot import make_filename numLines = lambda txt: len(txt.splitlines()) - txt.splitlines().count('') aveSize = lambda a, b: int( float(a) / float(b) ) if b != 0 else 0 ## a=quantity of something per how many b elts, if no b, return 0 wksToIndex = lambda rowWeek, colWeek: rowWeek + (colWeek) * COL_SEGMENTS msecToDate = lambda msec: datetime.datetime.fromtimestamp( float(msec) / 1000.0) DAY_IN_MS = 1000 * 60 * 60 * 24 COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7, 7, 5 ## add, edit,edit, del,del notes = user.note_owner.all() allLogs = ActivityLog.objects.filter( owner=user, action__in=['note-add', 'note-save', 'note-delete']) data = r.matrix(0, nrow=COL_SEGMENTS, ncol=ROW_GROUPS * GROUP_TYPES) nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS) ], [[] for n in range(ROW_GROUPS)] createdSize = [[0, 0] for i in range(ROW_GROUPS) ] ## [# notes, # lines] for each day of week editedSize = [[0, 0] for i in range(ROW_GROUPS)] noteD = {} for log in allLogs: noteArr = notes.filter(jid=log.noteid) if len(noteArr) < 1: ## Processing logs for which we still continue ## have the note (deleted or not) note = noteArr[0] actDate, birthDate = msecToDate(log.when), msecToDate(note.created) actDay, birthDay = actDate.weekday(), birthDate.weekday() firstRecord, lastTime = min(actDate, birthDate), max(actDate, birthDate) startOfDay = dd(lastTime.year, lastTime.month, lastTime.day) actTD = firstRecord - startOfDay actInPastWk = math.fabs( actTD.days ) <= 6 ## Both .created and .when happened within (current day + 6 previous days) if (log.action == 'note-add'): ## Record Add data[wksToIndex(birthDay, actDay * GROUP_TYPES + 0)] += 1 if ((log.noteText != None) and (log.noteText.count('') > 1) and (log.noteText.count('\n') < 100)): createdSize[actDay][0] += 1 ## Increment: ave note size, ##increment = 0 if note.deleted else 50 ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0 increment = log.noteText.count('') - 1 ##note.version*10 createdSize[actDay][1] += int(increment) elif (log.action == 'note-save' ): ## Record Save: Split (edit on day of note.created vs not) addVal = 1 if actInPastWk else 2 if (actInPastWk and log.noteid in nNewEdit[actDay]) or ( not actInPastWk and log.noteid in nOldEdit[actDay]): continue ## We've already recorded this note for it's time-frame data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1 nNewEdit[actDay].append( log.noteid) if actInPastWk else nOldEdit[actDay].append( log.noteid) ## Register Log elif (log.action == 'note-delete'): ## Record Death addVal = 3 if actInPastWk else 4 data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1 pass r.png(file=make_filename(filename), w=1000, h=500) dayNames = ["Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun"] colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown') title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str( allLogs.count()) + ":" + user.email + ":" + str(user.id) aveWidth = int( float(sum([elt[1] for elt in createdSize])) / float(sum([elt[0] for elt in createdSize]))) widths = [] [ widths.extend( [aveSize(elt[1], elt[0]), aveWidth, aveWidth, aveWidth, aveWidth]) for elt in createdSize ] axisNames = [] [ axisNames.extend([str(widths[i * 5]), "", "", str(dayNames[i]), ""]) for i in range(ROW_GROUPS) ] r.barplot(data, main=title, ylab='# Action Logs', beside=False, col=colors, space=r.c(3, 1, 0.1, 1, .1), names=axisNames, width=c(widths)) devoff()
def downloadYear(year): for date in pd.DateRange(dd(year,1,2), dd(year,12,31), pd.datetools.BDay ): downloadBorsa(date)
def downloadBorsa(date): outdir = '/home/wzhu/download_daily/borsa.bulletin/{:%Y}'.format(date) if not os.path.exists(outdir): os.makedirs(outdir) print date for sess in [1,2]: req2 = s.post(url,headers=headers,data=getLoad(date,sess)) pause() #5 sec res = req2.headers.get('content-disposition') if pd.isnull(res): print "no data on {}".format(date) break elif 'filename' in res: filename = res.split('=')[1] print filename filepath = '{}/{}'.format(outdir, filename) with open(filepath, "wb") as f: f.write(req2.content) if __name__ == '__main__': production = ''' day = dd.today() - pd.datetools.BDay(1) print "processing {}".format(day) downloadBorsa(day) ''' for date in pd.DateRange(dd(2015,7,21), dd(2015,7,31), pd.datetools.BDay ): downloadBorsa(date)
def dayOfYear(self, date: str) -> int: y, m, d = list(map(int, date.split('-'))) return (dd(y, m, d) - dd(y, 1, 1)).days + 1