def tslice(unit, start=None, end=None, step=1, count=None): '''tslice(unit,start=None,end=None,step=1,count=None) -> generator of Sandglass object unit in ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'] this is some kind xrange-like ''' if unit not in Sandglass._units: raise AttributeError() if isinstance(start, basestring): start = ben(start) if isinstance(end, basestring): end = ben(end) start = start or ben() count = count or float('inf') cur = start cnt = 0 if step > 0: end = end or ben(datetime.max) while cur < end and cnt < count: yield cur cur = cur.shifted(**{unit: step}) cnt += 1 elif step < 0: end = end or ben(datetime.min) while cur > end and cnt < count: yield cur cur = cur.shifted(**{unit: step}) cnt += 1
def tslice(unit,start=None,end=None,step=1,count=None): '''tslice(unit,start=None,end=None,step=1,count=None) -> generator of Sandglass object unit in ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond'] this is some kind xrange-like ''' if unit not in Sandglass._units: raise AttributeError() if isinstance(start,basestring): start = ben(start) if isinstance(end,basestring): end = ben(end) start = start or ben() count = count or float('inf') cur = start cnt = 0 if step>0: end = end or ben(datetime.max) while cur<end and cnt<count: yield cur cur = cur.shifted(**{unit:step}) cnt += 1 elif step<0: end = end or ben(datetime.min) while cur>end and cnt<count: yield cur cur = cur.shifted(**{unit:step}) cnt += 1
def __init__(self, expr, base=None): self.base = ben(base) if base else ben() if self.base.second or self.base.microsecond: #minute is min unit self.base = self.base.shift(minute=1).floor( 'minute') #discard second and microsecond self.cur = self.base.clone() self.entry = Entry() self.last_fit_year = self.cur.year self.range_len = { 'month': 12, 'day': getattr(self.cur, 'days_in_month'), 'isoweekday': 7, 'hour': 24, 'minute': 60, } tokens = expr.strip().split() if len(tokens) != 5: raise Exception('invalid expr') self.has_day = (tokens[2] != '*') self.has_isoweekday = (tokens[4] != '*') for i, tok in enumerate(tokens): item_list = tok.split(',') res = [] while item_list: item = item_list.pop(0) mat = re.search( r'^(\d+)-(\d+)(/(.*))?$', str(item).replace( '*', '{0[1]}-{0[2]}'.format(self.field_range[i]))) if mat: start, end, _, step = mat.groups() step = step or 1 start, end, step = map(int, (start, end, step)) _range_start, _range_end = self.field_range[i][1:3] if not _range_start <= start <= _range_end: raise Exception('invalid expr') if not _range_start <= end <= _range_end: raise Exception('invalid expr') if start > end: _rotate = range(_range_start, _range_end + 1) _rotate = _rotate[_rotate.index(start):]+\ _rotate[:_rotate.index(end)+1] _list = _rotate[::step] else: _list = range(start, end + 1, step) item_list += _list else: res.append(int(item)) self.entry[i] = sorted(res) if not self.has_isoweekday: self.entry.isoweekday = [] else: if not self.has_day: self.entry.day = []
def timediff(timestr, factor=86400, base=None): '''Get the distance to the next time >>>timediff('20:00:00',factor=86400,base='19:30:00') 1800 >>>timediff('20:00:00',factor=86400,base='21:30:00') 81000 ''' base = ben(base) if base else ben() sg = ben(timestr) diff = int((sg - base).total_seconds()) return diff % factor
def timediff(timestr,factor=86400,base=None): '''Get the distance to the next time >>>timediff('20:00:00',factor=86400,base='19:30:00') 1800 >>>timediff('20:00:00',factor=86400,base='21:30:00') 81000 ''' base = ben(base) if base else ben() sg = ben(timestr) diff = int((sg - base).total_seconds()) return diff%factor
def __init__(self,expr,base=None): self.base = ben(base) if base else ben() if self.base.second or self.base.microsecond:#minute is min unit self.base = self.base.shift(minute=1).floor('minute')#discard second and microsecond self.cur = self.base.clone() self.entry = Entry() self.last_fit_year = self.cur.year self.range_len = { 'month':12, 'day':getattr(self.cur,'days_in_month'), 'isoweekday':7, 'hour':24, 'minute':60, } tokens = expr.strip().split() if len(tokens)!=5: raise Exception('invalid expr') self.has_day = (tokens[2] != '*') self.has_isoweekday = (tokens[4] != '*') for i,tok in enumerate(tokens): item_list = tok.split(',') res = [] while item_list: item = item_list.pop(0) mat = re.search(r'^(\d+)-(\d+)(/(.*))?$',str(item).replace('*','{0[1]}-{0[2]}'.format(self.field_range[i]))) if mat: start,end,_,step = mat.groups() step = step or 1 start,end,step = map(int,(start,end,step)) _range_start,_range_end = self.field_range[i][1:3] if not _range_start<=start<=_range_end: raise Exception('invalid expr') if not _range_start<=end<=_range_end: raise Exception('invalid expr') if start>end: _rotate = range(_range_start,_range_end+1) _rotate = _rotate[_rotate.index(start):]+\ _rotate[:_rotate.index(end)+1] _list = _rotate[::step] else: _list = range(start,end+1,step) item_list += _list else: res.append(int(item)) self.entry[i] = sorted(res) if not self.has_isoweekday: self.entry.isoweekday = [] else: if not self.has_day: self.entry.day = []
def gen_submission(best_hotels_search_dest, best_hotels_od_ulc, popular_hotel_cluster): print('Generate submission...') path = '../../kaggle_data/hotel_recommendation/result/submission_' + ben( ).sql + '.csv' out = open(path, "w") f = open("../../kaggle_data/hotel_recommendation/data/test.csv", "r") f.readline() total = 0 out.write("id,hotel_cluster\n") topclasters = nlargest(5, sorted(popular_hotel_cluster.items()), key=itemgetter(1)) while 1: line = f.readline().strip() total += 1 if total % 1000000 == 0: print('Write {} lines...'.format(total)) if line == '': break arr = line.split(",") id = arr[0] user_location_city = arr[6] orig_destination_distance = arr[7] srch_destination_id = arr[17] hotel_country = arr[20] hotel_market = arr[21] out.write(str(id) + ',') filled = [] s1 = (user_location_city, orig_destination_distance) if s1 in best_hotels_od_ulc: d = best_hotels_od_ulc[s1] topitems = nlargest(5, sorted(d.items()), key=itemgetter(1)) for i in range(len(topitems)): if topitems[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topitems[i][0]) filled.append(topitems[i][0]) s2 = (srch_destination_id, hotel_country, hotel_market) if s2 in best_hotels_search_dest: d = best_hotels_search_dest[s2] topitems = nlargest(5, d.items(), key=itemgetter(1)) for i in range(len(topitems)): if topitems[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topitems[i][0]) filled.append(topitems[i][0]) for i in range(len(topclasters)): if topclasters[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topclasters[i][0]) filled.append(topclasters[i][0]) out.write("\n") out.close() print('Completed!')
def gen_submission(best_hotels_search_dest, best_hotels_od_ulc, popular_hotel_cluster): print('Generate submission...') path = '../../kaggle_data/hotel_recommendation/result/submission_' + ben().sql + '.csv' out = open(path, "w") f = open("../../kaggle_data/hotel_recommendation/data/test.csv", "r") f.readline() total = 0 out.write("id,hotel_cluster\n") topclasters = nlargest(5, sorted(popular_hotel_cluster.items()), key=itemgetter(1)) while 1: line = f.readline().strip() total += 1 if total % 1000000 == 0: print('Write {} lines...'.format(total)) if line == '': break arr = line.split(",") id = arr[0] user_location_city = arr[6] orig_destination_distance = arr[7] srch_destination_id = arr[17] hotel_country = arr[20] hotel_market = arr[21] out.write(str(id) + ',') filled = [] s1 = (user_location_city, orig_destination_distance) if s1 in best_hotels_od_ulc: d = best_hotels_od_ulc[s1] topitems = nlargest(5, sorted(d.items()), key=itemgetter(1)) for i in range(len(topitems)): if topitems[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topitems[i][0]) filled.append(topitems[i][0]) s2 = (srch_destination_id, hotel_country, hotel_market) if s2 in best_hotels_search_dest: d = best_hotels_search_dest[s2] topitems = nlargest(5, d.items(), key=itemgetter(1)) for i in range(len(topitems)): if topitems[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topitems[i][0]) filled.append(topitems[i][0]) for i in range(len(topclasters)): if topclasters[i][0] in filled: continue if len(filled) == 5: break out.write(' ' + topclasters[i][0]) filled.append(topclasters[i][0]) out.write("\n") out.close() print('Completed!')