예제 #1
0
def test_single_element_all_stypes(st):
    from datetime import date as dd
    if st == dt.stype.time64:
        return
    pt = (bool if st == dt.stype.bool8 else
          int if st.ltype == dt.ltype.int else
          float if st.ltype == dt.ltype.real else
          str if st.ltype == dt.ltype.str else
          dd if st == dt.stype.date32 else
          object)
    src = [True, False, True, None] if pt is bool else \
          [1, 7, -99, 214, None, 3333] if pt is int else \
          [2.5, 3.4e15, -7.909, None] if pt is float else \
          ['Oh', 'gobbly', None, 'sproo'] if pt is str else \
          [dd(2000, 5, 5), dd(2012, 12, 12), None] if pt is dd else \
          [dt, st, list, None, {3, 2, 1}]
    df = dt.Frame(A=src, stype=st)
    frame_integrity_check(df)
    assert df.names == ("A", )
    assert df.stypes == (st, )
    for i, item in enumerate(src):
        x = df[i, 0]
        y = df[i, "A"]
        assert x == y
        if item is None or st == dt.stype.void:
            assert x is None
        else:
            assert isinstance(x, pt)
            if st == dt.stype.int8:
                assert (x - item) % 256 == 0
            elif st == dt.stype.float32:
                assert abs(1 - item / x) < 1e-7
            else:
                assert x == item
def extract_time_range(cubes, start, end):
    """Extract time ranged data."""
    time_ranged_cubes = []
    iris.util.unify_time_units(cubes)
    time_unit = cubes[0].coord('time').units.name
    dd_start = dd(start.year, start.month, start.day, 0, 0, 0)
    t_1 = cf_units.date2num(dd_start, time_unit, cf_units.CALENDAR_STANDARD)
    dd_end = dd(end.year, end.month, end.day, 0, 0, 0)
    t_2 = cf_units.date2num(dd_end, time_unit, cf_units.CALENDAR_STANDARD)
    for cube in cubes:
        time_constraint = iris.Constraint(time=lambda t: (
            t_1 <= datetime_to_int_days(t.point, time_unit) <= t_2))
        cube_slice = cube.extract(time_constraint)
        time_ranged_cubes.append(cube_slice)
    return time_ranged_cubes
def datetime_to_int_days(date_obj, tunit):
    """Return time point converted from cube datetime cell."""
    if float(iris.__version__.split('.')[0]) >= 2.0:
        time_offset = get_time_offset(tunit)
        real_date = dd(date_obj.year, date_obj.month, date_obj.day, 0, 0, 0)
        days = (real_date - time_offset).days
    else:
        days = date_obj
    return days
예제 #4
0
def sBar(filename, user, title='title'):
    from jv3.study.ca_plot import make_filename
    numLines = lambda txt: len(txt.splitlines())-txt.splitlines().count('')
    aveSize = lambda a,b: int(float(a)/float(b)) if b != 0 else 0  ## a=quantity of something per how many b elts, if no b, return 0
    wksToIndex = lambda rowWeek, colWeek : rowWeek + (colWeek)*COL_SEGMENTS
    msecToDate = lambda msec : datetime.datetime.fromtimestamp(float(msec)/1000.0)
    DAY_IN_MS = 1000*60*60*24
    COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7,7,5 ## add, edit,edit, del,del
    notes = user.note_owner.all()
    allLogs = ActivityLog.objects.filter(owner=user, action__in=['note-add','note-save','note-delete'])
    data = r.matrix(0,nrow=COL_SEGMENTS, ncol=ROW_GROUPS*GROUP_TYPES)
    nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS)], [[] for n in range(ROW_GROUPS)]
    createdSize = [[0,0] for i in range(ROW_GROUPS)] ## [# notes, # lines] for each day of week
    editedSize = [[0,0] for i in range(ROW_GROUPS)]
    noteD = {}
    for log in allLogs:
        noteArr = notes.filter(jid=log.noteid)
        if len(noteArr) < 1:  ## Processing logs for which we still
            continue          ## have the note (deleted or not)
        note = noteArr[0]
        actDate, birthDate = msecToDate(log.when), msecToDate(note.created)
        actDay, birthDay = actDate.weekday(), birthDate.weekday()
        firstRecord, lastTime = min(actDate, birthDate ), max(actDate,birthDate)
        startOfDay = dd(lastTime.year, lastTime.month, lastTime.day)
        actTD = firstRecord - startOfDay
        actInPastWk = math.fabs(actTD.days) <= 6 ## Both .created and .when  happened within (current day + 6 previous days)
        if (log.action == 'note-add'):      ## Record Add
            data[wksToIndex(birthDay, actDay*GROUP_TYPES+0)] += 1
            if ((log.noteText != None) and (log.noteText.count('') > 1) and (log.noteText.count('\n') < 100)):
                createdSize[actDay][0] += 1   ## Increment: ave note size,
                ##increment = 0 if note.deleted else 50
                ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0
                increment = log.noteText.count('')-1  ##note.version*10
                createdSize[actDay][1] += int(increment)
        elif (log.action == 'note-save'):   ## Record Save: Split (edit on day of note.created vs not)
            addVal = 1 if actInPastWk else 2
            if (actInPastWk and log.noteid in nNewEdit[actDay]) or (not actInPastWk and log.noteid in nOldEdit[actDay]):
                continue  ## We've already recorded this note for it's time-frame
            data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1
            nNewEdit[actDay].append(log.noteid)  if actInPastWk else nOldEdit[actDay].append(log.noteid)  ## Register Log
        elif (log.action == 'note-delete'): ## Record Death
            addVal = 3 if actInPastWk else 4
            data[wksToIndex(birthDay, actDay*GROUP_TYPES + addVal)] += 1
            pass
    r.png(file = make_filename(filename), w=1000,h=500)
    dayNames = ["Mon","Tues","Wed","Thur","Fri","Sat","Sun"]
    colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown')
    title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(allLogs.count()) + ":" + user.email + ":" + str(user.id)
    aveWidth = int(float(sum([elt[1] for elt in createdSize]))/float(sum([elt[0] for elt in createdSize])))
    widths = []
    [widths.extend([aveSize(elt[1],elt[0]), aveWidth, aveWidth, aveWidth, aveWidth]) for elt in createdSize]
    axisNames = []
    [axisNames.extend([str(widths[i*5]),"","",str(dayNames[i]),""]) for i in range(ROW_GROUPS)]
    r.barplot(data, main=title,ylab='# Action Logs',beside=False, col=colors, space=r.c(3,1,0.1,1,.1), names=axisNames, width=c(widths))
    devoff()
예제 #5
0
파일: dump.py 프로젝트: formarx/bookmark
def get_date(str1, str2):
    year = str1[0:4]
    year = int(year)
    month = str2.split()[0]
    month = month[0:len(month) - 1]
    month = int(month)
    week = str2.split()[1]
    week = int(week[0:len(week) - 1])
    s = f'{year:04d}-{month:02d}-01'
    first_day = dd.strptime(s, '%Y-%m-%d')
    first_week = first_day.weekday()
    if first_week == 0:
        day = week * 7 - 6
    else:
        day = week * 7 - first_week + 1

    return dd(year, month, day)
예제 #6
0
        for cnt, row in train_data.iterrows():
            hood, act = som1.somfwd(row.values)
            data['hood_'+signal].ix[cnt] = hood
        ### now take todays data, walk through, and place each row in a neighborhood
        today['hood_'+signal] = None
        for cnt, row in today[use_cols].iterrows():
            hood, act = som1.somfwd(row.values)
            today['hood_'+signal].ix[cnt] = hood

        #predict_cols = [x for x in data.columns if x.endswith('fret')]
        #for ret_col in predict_cols:
        ret_col = signal + '.fret'
        hood_ret = data.groupby('hood_'+signal)[ret_col].median()
        hood_ret = hood_ret.to_dict()

        ### map the return back to securities
        today['pred_'+ret_col] = today['hood_'+signal].apply(hood_ret.get)

    today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date))


if __name__== '__main__':
    from datetime import datetime as dd
    for d in pd.date_range(dd(2012,1,1),dd(2016,12,31)):
        if d.weekday() == 4:
            #run(dd(2010,1,29))
            if COUNTRY_STUDY:
                run(d, only_country=COUNTRY_STUDY)
            else:
                run(d)
예제 #7
0
from datetime import date as dd

halloween = dd(2014,10,31)
print(halloween)
예제 #8
0
        today['hood_std'] = None
        for cnt, row in today[use_cols].iterrows():
            hood, act, csize, czscore, cavg, cstd = som1.somfwd(row.values, clusterSizeFlag=True)
            today['hood'].ix[cnt] = hood
            today['hood_ct'].ix[cnt] = csize
            today['hood_z'].ix[cnt] = czscore
            today['hood_avg'].ix[cnt] = cavg
            today['hood_std'].ix[cnt] = cstd


    if DEBUG: print 'group'

    for ret_col in predict_cols:
        hood_ret = data.groupby('hood')[ret_col].median()
        hood_ret = hood_ret.to_dict()

        ### map the return back to securities
        today['pred_'+ret_col] = today['hood'].apply(hood_ret.get)

    today.to_csv(MODEL_NAME+'/forecast_{:%Y%m%d}.csv'.format(date))

    if DEBUG: print 'done'

if __name__ == '__main__':
    from datetime import datetime as dd
    if DEBUG:
        run(dd(2010, 1, 31))
    else:
        for d in pd.date_range(dd(2008,1,1),dd(2011,12,31), freq='M'):
            run(d)
예제 #9
0
            if '1stSection' in row:
                started = True
        elif not ended:
            if '2ndSection' in row:
                ended = True
            else:  #started but not yet ended, we keep data
                cols = []
                tokens = row.split(' ')
                cols.append(tokens[0])
                cols.append(tokens[1])
                cols.append(" ".join(tokens[2:]))
                output.append(cols)

    col_names = ['company_ct', 'mcap', 'sector']

    data = pandas.DataFrame(output, columns=col_names)

    data['date'] = date.strftime('%Y%m%d')

    data.to_csv('mcap.csv/{:%Y%m}.csv'.format(date), index=False, header=False)
    return


#for dt in pandas.DateRange(datetime.datetime(2014,6,1), datetime.datetime(2014,6,2)):
for y in range(2010, 2016):
    end = 13
    if y == 2015:
        end = 7
    for m in range(1, end):
        parse_mcap(dd(y, m, 1))
예제 #10
0
    ### now take todays data, walk through, and place each row in a neighborhood
    today['hood'] = None
    for cnt, row in today[use_cols].iterrows():
        hood, act = som1.somfwd(row.values)
        today['hood'].ix[cnt] = hood

    if DEBUG: print 'group'

    for ret_col in predict_cols:
        hood_ret = data.groupby('hood')[ret_col].median()
        hood_ret = hood_ret.to_dict()

        ### map the return back to securities
        today['pred_' + ret_col] = today['hood'].apply(hood_ret.get)

    today.to_csv(MODEL_NAME + '/forecast_{:%Y%m%d}.csv'.format(date))

    if DEBUG: print 'done'


if __name__ == '__main__':
    from datetime import datetime as dd
    run(dd(2010, 1, 29))
    '''
    #for d in pd.date_range(dd(2008,1,1),dd(2011,12,31)):
        run(d)
        if d.weekday() == 4:
            #run(dd(2010,1,29))
            run(d)
    '''
예제 #11
0
def sBar(filename, user, title='title'):
    from jv3.study.ca_plot import make_filename
    numLines = lambda txt: len(txt.splitlines()) - txt.splitlines().count('')
    aveSize = lambda a, b: int(
        float(a) / float(b)
    ) if b != 0 else 0  ## a=quantity of something per how many b elts, if no b, return 0
    wksToIndex = lambda rowWeek, colWeek: rowWeek + (colWeek) * COL_SEGMENTS
    msecToDate = lambda msec: datetime.datetime.fromtimestamp(
        float(msec) / 1000.0)
    DAY_IN_MS = 1000 * 60 * 60 * 24
    COL_SEGMENTS, ROW_GROUPS, GROUP_TYPES = 7, 7, 5  ## add, edit,edit, del,del
    notes = user.note_owner.all()
    allLogs = ActivityLog.objects.filter(
        owner=user, action__in=['note-add', 'note-save', 'note-delete'])
    data = r.matrix(0, nrow=COL_SEGMENTS, ncol=ROW_GROUPS * GROUP_TYPES)
    nOldEdit, nNewEdit = [[] for n in range(ROW_GROUPS)
                          ], [[] for n in range(ROW_GROUPS)]
    createdSize = [[0, 0] for i in range(ROW_GROUPS)
                   ]  ## [# notes, # lines] for each day of week
    editedSize = [[0, 0] for i in range(ROW_GROUPS)]
    noteD = {}
    for log in allLogs:
        noteArr = notes.filter(jid=log.noteid)
        if len(noteArr) < 1:  ## Processing logs for which we still
            continue  ## have the note (deleted or not)
        note = noteArr[0]
        actDate, birthDate = msecToDate(log.when), msecToDate(note.created)
        actDay, birthDay = actDate.weekday(), birthDate.weekday()
        firstRecord, lastTime = min(actDate,
                                    birthDate), max(actDate, birthDate)
        startOfDay = dd(lastTime.year, lastTime.month, lastTime.day)
        actTD = firstRecord - startOfDay
        actInPastWk = math.fabs(
            actTD.days
        ) <= 6  ## Both .created and .when  happened within (current day + 6 previous days)
        if (log.action == 'note-add'):  ## Record Add
            data[wksToIndex(birthDay, actDay * GROUP_TYPES + 0)] += 1
            if ((log.noteText != None) and (log.noteText.count('') > 1)
                    and (log.noteText.count('\n') < 100)):
                createdSize[actDay][0] += 1  ## Increment: ave note size,
                ##increment = 0 if note.deleted else 50
                ##increment = numLines(log.noteText) if log and log.noteText and (numLines(log.noteText) < 1000) else 0
                increment = log.noteText.count('') - 1  ##note.version*10
                createdSize[actDay][1] += int(increment)
        elif (log.action == 'note-save'
              ):  ## Record Save: Split (edit on day of note.created vs not)
            addVal = 1 if actInPastWk else 2
            if (actInPastWk and log.noteid in nNewEdit[actDay]) or (
                    not actInPastWk and log.noteid in nOldEdit[actDay]):
                continue  ## We've already recorded this note for it's time-frame
            data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1
            nNewEdit[actDay].append(
                log.noteid) if actInPastWk else nOldEdit[actDay].append(
                    log.noteid)  ## Register Log
        elif (log.action == 'note-delete'):  ## Record Death
            addVal = 3 if actInPastWk else 4
            data[wksToIndex(birthDay, actDay * GROUP_TYPES + addVal)] += 1
            pass
    r.png(file=make_filename(filename), w=1000, h=500)
    dayNames = ["Mon", "Tues", "Wed", "Thur", "Fri", "Sat", "Sun"]
    colors = r.c("red", 'orange', 'yellow', 'green', 'blue', 'grey', 'brown')
    title = "#Notes:#Logs:Email:ID -- " + str(notes.count()) + ":" + str(
        allLogs.count()) + ":" + user.email + ":" + str(user.id)
    aveWidth = int(
        float(sum([elt[1] for elt in createdSize])) /
        float(sum([elt[0] for elt in createdSize])))
    widths = []
    [
        widths.extend(
            [aveSize(elt[1], elt[0]), aveWidth, aveWidth, aveWidth, aveWidth])
        for elt in createdSize
    ]
    axisNames = []
    [
        axisNames.extend([str(widths[i * 5]), "", "",
                          str(dayNames[i]), ""]) for i in range(ROW_GROUPS)
    ]
    r.barplot(data,
              main=title,
              ylab='# Action Logs',
              beside=False,
              col=colors,
              space=r.c(3, 1, 0.1, 1, .1),
              names=axisNames,
              width=c(widths))
    devoff()
예제 #12
0
def downloadYear(year):
    for date in pd.DateRange(dd(year,1,2), dd(year,12,31), pd.datetools.BDay ):
        downloadBorsa(date)
예제 #13
0
def downloadBorsa(date):
    outdir = '/home/wzhu/download_daily/borsa.bulletin/{:%Y}'.format(date)
    if not os.path.exists(outdir):
	os.makedirs(outdir)
    print date

    for sess in [1,2]:
        req2 = s.post(url,headers=headers,data=getLoad(date,sess))
	pause() #5 sec
        res = req2.headers.get('content-disposition')
        if pd.isnull(res):
	    print "no data on {}".format(date)
	    break
        elif 'filename' in res:
	    filename = res.split('=')[1]
            print filename
	    filepath = '{}/{}'.format(outdir, filename)
            with open(filepath, "wb") as f:
	        f.write(req2.content)


if __name__ == '__main__':
    production = '''
    day = dd.today() - pd.datetools.BDay(1)
    print "processing {}".format(day)
    downloadBorsa(day)
    '''
    for date in pd.DateRange(dd(2015,7,21), dd(2015,7,31), pd.datetools.BDay ):
	downloadBorsa(date)

예제 #14
0
 def dayOfYear(self, date: str) -> int:
     y, m, d = list(map(int, date.split('-')))
     return (dd(y, m, d) - dd(y, 1, 1)).days + 1