Exemple #1
0
def tslice(unit, start=None, end=None, step=1, count=None):
    '''tslice(unit,start=None,end=None,step=1,count=None) -> generator of Sandglass object
    unit in ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond']
    this is some kind xrange-like
    '''
    if unit not in Sandglass._units:
        raise AttributeError()
    if isinstance(start, basestring):
        start = ben(start)
    if isinstance(end, basestring):
        end = ben(end)

    start = start or ben()
    count = count or float('inf')
    cur = start
    cnt = 0
    if step > 0:
        end = end or ben(datetime.max)
        while cur < end and cnt < count:
            yield cur
            cur = cur.shifted(**{unit: step})
            cnt += 1
    elif step < 0:
        end = end or ben(datetime.min)
        while cur > end and cnt < count:
            yield cur
            cur = cur.shifted(**{unit: step})
            cnt += 1
Exemple #2
0
def tslice(unit,start=None,end=None,step=1,count=None):
    '''tslice(unit,start=None,end=None,step=1,count=None) -> generator of Sandglass object
    unit in ['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond']
    this is some kind xrange-like
    '''
    if unit not in Sandglass._units:
        raise AttributeError()
    if isinstance(start,basestring):
        start = ben(start)
    if isinstance(end,basestring):
        end = ben(end)

    start = start or ben()
    count = count or float('inf')
    cur  = start
    cnt = 0
    if step>0:
        end = end or ben(datetime.max)
        while cur<end and cnt<count:
            yield cur
            cur = cur.shifted(**{unit:step})
            cnt += 1
    elif step<0:
        end = end or ben(datetime.min)
        while cur>end and cnt<count:
            yield cur
            cur = cur.shifted(**{unit:step})
            cnt += 1
Exemple #3
0
    def __init__(self, expr, base=None):
        self.base = ben(base) if base else ben()
        if self.base.second or self.base.microsecond:  #minute is min unit
            self.base = self.base.shift(minute=1).floor(
                'minute')  #discard second and microsecond
        self.cur = self.base.clone()
        self.entry = Entry()
        self.last_fit_year = self.cur.year
        self.range_len = {
            'month': 12,
            'day': getattr(self.cur, 'days_in_month'),
            'isoweekday': 7,
            'hour': 24,
            'minute': 60,
        }
        tokens = expr.strip().split()
        if len(tokens) != 5:
            raise Exception('invalid expr')
        self.has_day = (tokens[2] != '*')
        self.has_isoweekday = (tokens[4] != '*')
        for i, tok in enumerate(tokens):
            item_list = tok.split(',')
            res = []
            while item_list:
                item = item_list.pop(0)
                mat = re.search(
                    r'^(\d+)-(\d+)(/(.*))?$',
                    str(item).replace(
                        '*', '{0[1]}-{0[2]}'.format(self.field_range[i])))
                if mat:
                    start, end, _, step = mat.groups()
                    step = step or 1
                    start, end, step = map(int, (start, end, step))
                    _range_start, _range_end = self.field_range[i][1:3]
                    if not _range_start <= start <= _range_end:
                        raise Exception('invalid expr')
                    if not _range_start <= end <= _range_end:
                        raise Exception('invalid expr')
                    if start > end:
                        _rotate = range(_range_start, _range_end + 1)
                        _rotate = _rotate[_rotate.index(start):]+\
                                  _rotate[:_rotate.index(end)+1]
                        _list = _rotate[::step]
                    else:
                        _list = range(start, end + 1, step)
                    item_list += _list
                else:
                    res.append(int(item))

            self.entry[i] = sorted(res)
        if not self.has_isoweekday:
            self.entry.isoweekday = []
        else:
            if not self.has_day:
                self.entry.day = []
Exemple #4
0
def timediff(timestr, factor=86400, base=None):
    '''Get the distance to the next time
    >>>timediff('20:00:00',factor=86400,base='19:30:00')
    1800
    >>>timediff('20:00:00',factor=86400,base='21:30:00')
    81000
    '''
    base = ben(base) if base else ben()
    sg = ben(timestr)
    diff = int((sg - base).total_seconds())
    return diff % factor
Exemple #5
0
def timediff(timestr,factor=86400,base=None):
    '''Get the distance to the next time
    >>>timediff('20:00:00',factor=86400,base='19:30:00')
    1800
    >>>timediff('20:00:00',factor=86400,base='21:30:00')
    81000
    '''
    base = ben(base) if base else ben()
    sg = ben(timestr)
    diff = int((sg - base).total_seconds())
    return diff%factor
Exemple #6
0
    def __init__(self,expr,base=None):
        self.base = ben(base) if base else ben()
        if self.base.second or self.base.microsecond:#minute is min unit
            self.base = self.base.shift(minute=1).floor('minute')#discard second and microsecond
        self.cur = self.base.clone()
        self.entry = Entry()
        self.last_fit_year = self.cur.year
        self.range_len = {
                    'month':12,
                    'day':getattr(self.cur,'days_in_month'),
                    'isoweekday':7,
                    'hour':24,
                    'minute':60,
                }
        tokens = expr.strip().split()
        if len(tokens)!=5:
            raise Exception('invalid expr')
        self.has_day = (tokens[2] != '*')
        self.has_isoweekday = (tokens[4] != '*')
        for i,tok in enumerate(tokens):
            item_list = tok.split(',')
            res = []
            while item_list:
                item = item_list.pop(0)
                mat = re.search(r'^(\d+)-(\d+)(/(.*))?$',str(item).replace('*','{0[1]}-{0[2]}'.format(self.field_range[i])))
                if mat:
                    start,end,_,step = mat.groups()
                    step = step or 1
                    start,end,step = map(int,(start,end,step))
                    _range_start,_range_end = self.field_range[i][1:3]
                    if not _range_start<=start<=_range_end:
                        raise Exception('invalid expr')
                    if not _range_start<=end<=_range_end:
                        raise Exception('invalid expr')
                    if start>end:
                        _rotate = range(_range_start,_range_end+1)
                        _rotate = _rotate[_rotate.index(start):]+\
                                  _rotate[:_rotate.index(end)+1]
                        _list = _rotate[::step]
                    else:
                        _list = range(start,end+1,step)
                    item_list += _list
                else:
                    res.append(int(item))

            self.entry[i] = sorted(res)
        if not self.has_isoweekday:
            self.entry.isoweekday = []
        else:
            if not self.has_day:
                self.entry.day = []
Exemple #7
0
def gen_submission(best_hotels_search_dest, best_hotels_od_ulc,
                   popular_hotel_cluster):
    print('Generate submission...')
    path = '../../kaggle_data/hotel_recommendation/result/submission_' + ben(
    ).sql + '.csv'
    out = open(path, "w")
    f = open("../../kaggle_data/hotel_recommendation/data/test.csv", "r")
    f.readline()
    total = 0
    out.write("id,hotel_cluster\n")
    topclasters = nlargest(5,
                           sorted(popular_hotel_cluster.items()),
                           key=itemgetter(1))

    while 1:
        line = f.readline().strip()
        total += 1

        if total % 1000000 == 0:
            print('Write {} lines...'.format(total))

        if line == '':
            break

        arr = line.split(",")
        id = arr[0]
        user_location_city = arr[6]
        orig_destination_distance = arr[7]
        srch_destination_id = arr[17]
        hotel_country = arr[20]
        hotel_market = arr[21]

        out.write(str(id) + ',')
        filled = []

        s1 = (user_location_city, orig_destination_distance)
        if s1 in best_hotels_od_ulc:
            d = best_hotels_od_ulc[s1]
            topitems = nlargest(5, sorted(d.items()), key=itemgetter(1))
            for i in range(len(topitems)):
                if topitems[i][0] in filled:
                    continue
                if len(filled) == 5:
                    break
                out.write(' ' + topitems[i][0])
                filled.append(topitems[i][0])

        s2 = (srch_destination_id, hotel_country, hotel_market)
        if s2 in best_hotels_search_dest:
            d = best_hotels_search_dest[s2]
            topitems = nlargest(5, d.items(), key=itemgetter(1))
            for i in range(len(topitems)):
                if topitems[i][0] in filled:
                    continue
                if len(filled) == 5:
                    break
                out.write(' ' + topitems[i][0])
                filled.append(topitems[i][0])

        for i in range(len(topclasters)):
            if topclasters[i][0] in filled:
                continue
            if len(filled) == 5:
                break
            out.write(' ' + topclasters[i][0])
            filled.append(topclasters[i][0])

        out.write("\n")
    out.close()
    print('Completed!')
Exemple #8
0
def gen_submission(best_hotels_search_dest, best_hotels_od_ulc, popular_hotel_cluster):
    print('Generate submission...')
    path = '../../kaggle_data/hotel_recommendation/result/submission_' + ben().sql + '.csv'
    out = open(path, "w")
    f = open("../../kaggle_data/hotel_recommendation/data/test.csv", "r")
    f.readline()
    total = 0
    out.write("id,hotel_cluster\n")
    topclasters = nlargest(5, sorted(popular_hotel_cluster.items()), key=itemgetter(1))

    while 1:
        line = f.readline().strip()
        total += 1

        if total % 1000000 == 0:
            print('Write {} lines...'.format(total))

        if line == '':
            break

        arr = line.split(",")
        id = arr[0]
        user_location_city = arr[6]
        orig_destination_distance = arr[7]
        srch_destination_id = arr[17]
        hotel_country = arr[20]
        hotel_market = arr[21]

        out.write(str(id) + ',')
        filled = []

        s1 = (user_location_city, orig_destination_distance)
        if s1 in best_hotels_od_ulc:
            d = best_hotels_od_ulc[s1]
            topitems = nlargest(5, sorted(d.items()), key=itemgetter(1))
            for i in range(len(topitems)):
                if topitems[i][0] in filled:
                    continue
                if len(filled) == 5:
                    break
                out.write(' ' + topitems[i][0])
                filled.append(topitems[i][0])

        s2 = (srch_destination_id, hotel_country, hotel_market)
        if s2 in best_hotels_search_dest:
            d = best_hotels_search_dest[s2]
            topitems = nlargest(5, d.items(), key=itemgetter(1))
            for i in range(len(topitems)):
                if topitems[i][0] in filled:
                    continue
                if len(filled) == 5:
                    break
                out.write(' ' + topitems[i][0])
                filled.append(topitems[i][0])

        for i in range(len(topclasters)):
            if topclasters[i][0] in filled:
                continue
            if len(filled) == 5:
                break
            out.write(' ' + topclasters[i][0])
            filled.append(topclasters[i][0])

        out.write("\n")
    out.close()
    print('Completed!')