Python get_weblist 예제들, cmlib.get_weblist Python 예제들

예제 #1

0

파일 보기

파일: downloader_class.py 프로젝트: chenm2015/homework-alarm

    def get_update_list(self):
        tmp_dir = self.get_listfile_dir()
        cmlib.make_dir(tmp_dir)
        flist = open(self.listfile, 'w')  
    
        month_list = self.get_month_list_dot()
        for month in month_list:
            web_location = ''
            if self.co.startswith('rrc'):
                web_location = rrc_root + self.co + '/' + month + '/' 
            else:
                web_location = rv_root + self.co + '/bgpdata/' + month + '/UPDATES/'
                web_location = web_location.replace('//', '/')  # when name is ''

            webraw = cmlib.get_weblist('http://' + web_location)
            cmlib.make_dir(datadir+web_location)

            for line in webraw.split('\n'):
                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                fsize = cmlib.parse_size(size)
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its date in our range
                if int(filedate) < int(self.sdate) or int(filedate) > int(self.edate):
                    continue
                # note: storing the original .bz2/.gz file name makes logic clearer
                flist.write(web_location+filename+'.txt.gz|'+str(fsize)+'\n')
                logging.info('record file name: '+web_location+filename+'.txt.gz|'+str(fsize))

        return 0

예제 #2

0

파일 보기

    def get_pfx2as_file(self):
        location = datadir + 'support/' + self.sdate + '/'
        cmlib.make_dir(location)

        tmp = os.listdir(datadir+'support/'+self.sdate+'/')
        for line in tmp:
            if 'pfx2as' in line:
                return 0 # we already have a prefix2as file

        print 'Downloading prefix to AS file ...'
        year, month = self.sdate[:4], self.sdate[4:6] # YYYY, MM
        webloc = 'http://data.caida.org/datasets/routing/routeviews-prefix2as' +\
                '/' + year + '/' + month + '/'

        webraw = cmlib.get_weblist(webloc)
        target_line = ''
        for line in webraw.split('\n'):
            if self.sdate in line:
                target_line = line
                break

        if target_line == '':
            print 'Downloading prefix to AS file fails: no such date!'
            return 0

        fname = target_line.split()[0]
        urllib.urlretrieve(webloc+fname, location+fname)
        subprocess.call('gunzip -c '+location+fname+' > '+\
                location+fname.replace('.gz', ''), shell=True)
        os.remove(location+fname)

        return 0

예제 #3

0

파일 보기

파일: period_class.py 프로젝트: chenm2015/homework-alarm

    def get_pfx2as_file(self):
        location = self.spt_dir
        cmlib.make_dir(location)

        tmp = os.listdir(self.spt_dir)
        for line in tmp:
            if 'pfx2as' in line:
                return 0 # we already have a prefix2as file

        print 'Downloading prefix to AS file ...'
        year, month = self.sdate[:4], self.sdate[4:6] # YYYY, MM
        webloc = 'http://data.caida.org/datasets/routing/routeviews-prefix2as' +\
                '/' + year + '/' + month + '/'

        webraw = cmlib.get_weblist(webloc)
        target_line = ''
        for line in webraw.split('\n'):
            if self.sdate in line:
                target_line = line
                break

        if target_line == '':
            print 'Downloading prefix to AS file fails: no such date!'
            return 0

        fname = target_line.split()[0]
        urllib.urlretrieve(webloc+fname, location+fname)
        subprocess.call('gunzip -c '+location+fname+' > '+\
                location+fname.replace('.gz', ''), shell=True)
        os.remove(location+fname)

        return 0

예제 #4

0

파일 보기

    def get_as2cc_file(self):  # AS to customer cone
        sptfiles = os.listdir(self.spt_dir)
        for line in sptfiles:
            if 'ppdc' in line:
                return 0  # already have a file

        target_line = None
        yearmonth = self.sdate[:6]  # YYYYMM
        print 'Downloading AS to customer cone file ...'
        theurl = 'http://data.caida.org/datasets/2013-asrank-data-supplement/data/'
        webraw = cmlib.get_weblist(theurl)
        for line in webraw.split('\n'):
            if yearmonth in line and 'ppdc' in line:
                target_line = line
                break

        assert target_line != None

        fname = target_line.split()[0]
        cmlib.force_download_file(theurl, self.spt_dir, fname)
        if int(yearmonth) <= 201311:
            # unpack .gz (only before 201311 (include))
            subprocess.call('gunzip ' + self.spt_dir + fname, shell=True)
        else:
            # unpack .bz2 (only after 201406 (include))
            subprocess.call('bunzip2 -d ' + self.spt_dir + fname, shell=True)

        return 0

예제 #5

0

파일 보기

파일: period_class.py 프로젝트: chenm2015/homework-alarm

    def get_as2cc_file(self): # AS to customer cone
        sptfiles = os.listdir(self.spt_dir)
        for line in sptfiles:
            if 'ppdc' in line:
                return 0 # already have a file

        target_line = None
        yearmonth = self.sdate[:6] # YYYYMM
        print 'Downloading AS to customer cone file ...'
        theurl = 'http://data.caida.org/datasets/2013-asrank-data-supplement/data/'
        webraw = cmlib.get_weblist(theurl)
        for line in webraw.split('\n'):
            if yearmonth in line and 'ppdc' in line:
                target_line = line
                break

        assert target_line != None

        fname = target_line.split()[0]
        cmlib.force_download_file(theurl, self.spt_dir, fname)
        if int(yearmonth) <= 201311:
            # unpack .gz (only before 201311 (include))
            subprocess.call('gunzip '+self.spt_dir+fname, shell=True)
        else:
            # unpack .bz2 (only after 201406 (include))
            subprocess.call('bunzip2 -d '+self.spt_dir+fname, shell=True)

        return 0

예제 #6

0

파일 보기

    def get_update_list(self):
        tmp_dir = self.get_listfile_dir()
        cmlib.make_dir(tmp_dir)
        flist = open(self.listfile, 'w')

        month_list = self.get_month_list_dot()
        for month in month_list:
            web_location = ''
            if self.co.startswith('rrc'):
                web_location = rrc_root + self.co + '/' + month + '/'
            else:
                web_location = rv_root + self.co + '/bgpdata/' + month + '/UPDATES/'
                web_location = web_location.replace('//',
                                                    '/')  # when name is ''

            webraw = cmlib.get_weblist('http://' + web_location)
            cmlib.make_dir(datadir + web_location)

            for line in webraw.split('\n'):
                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                fsize = cmlib.parse_size(size)
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its date in our range
                if int(filedate) < int(self.sdate) or int(filedate) > int(
                        self.edate):
                    continue
                # note: storing the original .bz2/.gz file name makes logic clearer
                flist.write(web_location + filename + '.txt.gz|' + str(fsize) +
                            '\n')
                logging.info('record file name: ' + web_location + filename +
                             '.txt.gz|' + str(fsize))

        return 0

예제 #7

0

파일 보기

파일: downloader_class.py 프로젝트: chenm2015/homework-alarm

    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        target_line = None # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line
            
            diff = abs(int(fdate)-int(my_date)) # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line # work-around for a special case


        print 'Selected RIB:', target_line
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed size & original size:',os.path.getsize(full_loc+'.txt.gz'),fsize
            if os.path.getsize(full_loc+'.txt.gz') > 0.6 * fsize: # 0.6 is good enough
                return full_loc+'.txt.gz' # Do not download
            else:
                os.remove(full_loc+'.txt.gz') # too small to be complete

        if os.path.exists(full_loc): 
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else: # Good!
                cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
                cmlib.pack_gz(full_loc+'.txt')
                return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'

예제 #8

0

파일 보기

파일: downloader_class.py 프로젝트: chenm2015/homework-alarm

    def download_one_rib_before_unix(self, my_date, unix): # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/' 
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir+web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [item for item in rib_list if 'rib' in item or 'bview' in item]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist) 

        ok_rib_list = list() # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None # the RIB closest to unix 
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate+ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M') 
            runix = time_lib.mktime(objdt.timetuple()) + 8*60*60 # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix-runix < min:
                min = unix-runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1] # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename # .bz2/.gz

        if os.path.exists(full_loc+'.txt'): # only for clearer logic
            os.remove(full_loc+'.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc+'.txt.gz'): 
            print 'existed!!!!!!!!!!!!'
            return full_loc+'.txt.gz' # Do not download

        if os.path.exists(full_loc): 
            cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
            cmlib.pack_gz(full_loc+'.txt')
            return full_loc+'.txt.gz'


        cmlib.force_download_file('http://'+web_location, datadir+web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc+'.txt', fsize)
        cmlib.pack_gz(full_loc+'.txt')
        os.remove(full_loc) # remove the original file

        return full_loc+'.txt.gz'

예제 #9

0

파일 보기

def get_file():
    for clctr in collectors:
        cl_name = clctr
        hdname_detail = hdname + 'archive.routeviews.org/' + cl_name +\
            '/bgpdata/'
        hdname_detail = hdname_detail.replace('//', '/') # happens when cl = ''
        # only for downloading updates, not RIBs
        for ym in yearmonth:
            sdate = ym.split('.')[0] + ym.split('.')[1] + '01'
            edate = ym.split('.')[0] + ym.split('.')[1] + '07'
            filelocation = ''
            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + ym + '/UPDATES/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+'metadata/'+ym)
            flist = open(hdname+'metadata/'+ym+'/updt_filelist_'+cl_name, 'w')
            cmlib.make_dir(hdname+filelocation)
            for line in webraw.split('\n'):

                if not 'updates' in line or line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])
                filename = line.split()[0]  # omit uninteresting info
                filedate = filename.split('.')[-3]

                # check whether its datetime in our range
                if int(filedate) < int(sdate) or int(filedate) > int(edate):
                    continue

                print filename

                origin_floc = hdname + filelocation + filename # original file loc&name
                flist.write(origin_floc+'.txt.gz\n')  # .xx.txt.gz file list

                # remove existing xx.txt file to make things clearer
                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                if os.path.exists(origin_floc+'.txt.gz'):
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        continue
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc):
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        continue
                    else:
                        os.remove(origin_floc)


                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename) 

            # file that stores update list
            flist.close()

            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + ym + '/RIBS/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+filelocation)

            # for each event, we only download one RIB (on the sdate)
            rib_fname = ''
            for line in webraw.split('\n'):
                
                if not 'rib' in line and not 'bview' in line:
                    continue
                if line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])

                filename = line.split()[0]
                print filename
                if not int(filename.split('.')[-3]) == int(sdate):
                    continue
                print filename
                origin_floc = hdname + filelocation + filename # original file loc&name

                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                rib_fname = filelocation + filename
                if os.path.exists(origin_floc+'.txt.gz'): 
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        break
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc): 
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        break
                    else:
                        os.remove(origin_floc)

                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename)
                break


            # download one rib to intial as_path
            sdate_datetime = datetime.datetime(int(sdate[0:4]), int(sdate[4:6]),int(sdate[6:8]))
            as_path_date = sdate_datetime - datetime.timedelta(days=1)
            as_path_date = as_path_date.strftime('%Y%m%d')
            
            as_path_ym = as_path_date[0:4] + '.' + as_path_date[4:6]
            filelocation = 'archive.routeviews.org/' + cl_name + '/bgpdata/' + as_path_ym + '/RIBS/'
            filelocation = filelocation.replace('//', '/')  # when name is ''
            webraw = cmlib.get_weblist('http://' + filelocation)
            print filelocation
            cmlib.make_dir(hdname+filelocation)

            asrib_fname = ''
            for line in reversed(webraw.split('\n')):
                print line
                if not 'rib' in line and not 'bview' in line:
                    continue
                if line == '' or line == '\n':
                    continue

                size = line.split()[-1]
                if size.isdigit():
                    fsize = float(size)
                else:
                    fsize = float(size[:-1]) * cmlib.size_u2v(size[-1])

                filename = line.split()[0]
                print filename
                if not int(filename.split('.')[-3]) == int(as_path_date):
                    continue
                print filename
                origin_floc = hdname + filelocation + filename # original file loc&name

                try:
                    os.remove(origin_floc+'.txt')
                except:
                    pass

                asrib_fname = filelocation + filename
                if os.path.exists(origin_floc+'.txt.gz'): 
                    if os.path.getsize(origin_floc+'.txt.gz') > 0.1 * fsize:
                        if os.path.exists(origin_floc):  # .bz2/.gz useless anymore
                            os.remove(origin_floc)
                        break
                    else:
                        os.remove(origin_floc+'.txt.gz')

                if os.path.exists(origin_floc): 
                    if os.path.getsize(origin_floc) > 0.9 * fsize:
                        break
                    else:
                        os.remove(origin_floc)

                cmlib.force_download_file('http://'+filelocation, hdname+filelocation, filename)
                break
            ## now for update and RIB files, their formats are either .bz2/gz or
            ## .xx.txt.gz!!!

            print 'parsing updates...'
            parse_updates(ym, cl_name)

            print 'parsing RIB and getting peers...'
            rib_location = hdname + rib_fname  # .bz2/.gz
            #print rib_location,'dd'
            peers = get_peers(clctr,ym,rib_location)
            print 'peers: ', peers
            
            as_path_rib_location = hdname + asrib_fname  # .bz2/.gz            
            process_as_path_rib(clctr,as_path_ym,as_path_rib_location)

            print 'determining table transfers start and end time for each peer...'
            for peer in peers:  # must process each peer one by one
                peer = peer.rstrip()
                print 'processing ',peer,'...'
                subprocess.call('perl '+homedir+'tool/bgpmct.pl -rf '+rib_location+'.txt.gz'+' -ul '+\
                        hdname+'metadata/'+ym+'/updt_filelist_'+cl_name+' -p '+peer+' > '+\
                        hdname+'tmp/'+peer+'_result.txt', shell=True)
                    
            print 'delete updates caused by session reset for each peer...'
            for peer in peers:
                # No reset from this peer, so nothing in the file
                try:
                    if os.path.getsize(hdname+'tmp/'+peer+'_result.txt') == 0:
                        continue
                except: # cannot find file
                    continue
                print '\nculprit now: ', peer
                del_tabletran_updates(peer, ym, cl_name)

            # delete all rubbish in the end
            subprocess.call('rm '+hdname+'tmp/*', shell=True)
                                
    return

예제 #10

0

파일 보기

    def download_one_rib(self, my_date):
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')
        webraw = cmlib.get_weblist('http://' + web_location)

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        target_line = None  # stores the RIB file for downloading
        largest_line = None
        max = -1
        closest = 99999
        for line in rib_list:
            fdate = line.split()[0].split('.')[-3]
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            if fsize > max:
                max = fsize
                largest_line = line

            diff = abs(int(fdate) - int(my_date))  # >0
            # XXX logic here not clear (but seems effective)
            if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg:
                target_line = line
                closest = diff

        if target_line is None:
            assert largest_line is not None
            print 'Failed. Resort to downloading the largest RIB...'
            target_line = largest_line  # work-around for a special case

        print 'Selected RIB:', target_line
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed size & original size:', os.path.getsize(
                full_loc + '.txt.gz'), fsize
            if os.path.getsize(full_loc +
                               '.txt.gz') > 0.6 * fsize:  # 0.6 is good enough
                return full_loc + '.txt.gz'  # Do not download
            else:
                os.remove(full_loc + '.txt.gz')  # too small to be complete

        if os.path.exists(full_loc):
            if os.path.getsize(full_loc) <= 0.95 * fsize:
                os.remove(full_loc)
            else:  # Good!
                cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
                cmlib.pack_gz(full_loc + '.txt')
                return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'

예제 #11

0

파일 보기

    def download_one_rib_before_unix(self, my_date,
                                     unix):  # my_date for deciding month
        tmp_month = my_date[0:4] + '.' + my_date[4:6]
        if self.co.startswith('rrc'):
            web_location = rrc_root + self.co + '/' + tmp_month + '/'
        else:
            web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/'
            web_location = web_location.replace('//', '/')

        try:
            webraw = cmlib.get_weblist('http://' + web_location)
            print 'Getting list from ' + 'http://' + web_location
        except:
            return -1

        cmlib.make_dir(datadir + web_location)

        #----------------------------------------------------------------
        # select a RIB file right before the unix and with reasonable (not strange) file size
        rib_list = webraw.split('\n')
        filter(lambda a: a != '', rib_list)
        filter(lambda a: a != '\n', rib_list)
        rib_list = [
            item for item in rib_list if 'rib' in item or 'bview' in item
        ]

        sizelist = list()
        for line in rib_list:
            size = line.split()[-1]
            fsize = cmlib.parse_size(size)
            sizelist.append(fsize)

        avg = np.mean(sizelist)

        ok_rib_list = list()  # RIBs whose size is OK
        for line in rib_list:
            fsize = cmlib.parse_size(line.split()[-1])
            if fsize > 0.9 * avg:
                ok_rib_list.append(line)

        target_line = None  # the RIB closest to unix
        min = 9999999999
        for line in ok_rib_list:
            fdate = line.split()[0].split('.')[-3]
            ftime = line.split()[0].split('.')[-2]
            dtstr = fdate + ftime
            objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M')
            runix = time_lib.mktime(
                objdt.timetuple()) + 8 * 60 * 60  # F**k! Time zone!
            print objdt, runix, unix
            if runix <= unix and unix - runix < min:
                min = unix - runix
                print 'min changed to ', min
                target_line = line

        print 'Selected RIB:', target_line
        if target_line == None:
            return -1
        size = target_line.split()[-1]  # claimed RIB file size
        fsize = cmlib.parse_size(size)

        filename = target_line.split()[0]
        full_loc = datadir + web_location + filename  # .bz2/.gz

        if os.path.exists(full_loc + '.txt'):  # only for clearer logic
            os.remove(full_loc + '.txt')

        #------------------------------------------------------------------
        # Download the RIB
        if os.path.exists(full_loc + '.txt.gz'):
            print 'existed!!!!!!!!!!!!'
            return full_loc + '.txt.gz'  # Do not download

        if os.path.exists(full_loc):
            cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
            cmlib.pack_gz(full_loc + '.txt')
            return full_loc + '.txt.gz'

        cmlib.force_download_file('http://' + web_location,
                                  datadir + web_location, filename)
        cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize)
        cmlib.pack_gz(full_loc + '.txt')
        os.remove(full_loc)  # remove the original file

        return full_loc + '.txt.gz'