Example #1
0
def statuses_to_csv():
    if os.path.exists(status_csv):
        with open(status_csv, 'w') as sout, open(status2_csv, 'w') as sout2:
            sout.write('link,domain,suffix,idx,status\n')
            sout2.write('link,domain,suffix,idx,status\n')
            statuses = read_pickle(wsmall_latest_nwl_status)
            c = 0
            for link, status in statuses.items():
                tld = tldextract.extract(link)
                if status == -1:
                    sout2.write('%s,%s,%s,%d,%s\n' %
                                (link, tld.domain, tld.suffix, c, 'timedout'))
                elif status == -2:
                    sout2.write('%s,%s,%s,%d,%s\n' %
                                (link, tld.domain, tld.suffix, c, 'ip'))
                else:
                    sout.write('%s,%s,%s,%d,%d\n' %
                               (link, tld.domain, tld.suffix, c, status))
                c += 1
    statuses = read_pickle(wsmall_latest_nwl_status)
    with open('output_files/status_count.table', 'w') as out:
        c = Counter()
        for link, status in statuses.items():
            if status == -1:
                c['timedout'] += 1
            elif status == -2:
                c['ip'] += 1
            else:
                c[status] += 1
        headers = ['status', 'count']
        data = []

        for k, v in c.items():
            data.append([k, v])
        out.write(tabulate(data, headers=headers, tablefmt='latex'))
Example #2
0
    def loadFromCache(self):
        """ load the data and the map from the cache """
        _debug_('loadFromCache()', 2)
        self.elocationData = util.read_pickle(self.cacheElocation)
        self.currentData = util.read_pickle(self.cacheCurrent)
        self.forecastData = util.read_pickle(self.cacheForecast)

        try:
            size = int(os.stat(self.mapFile)[6])
        except Exception, error:
            _debug_('failed loading weather map for "%s" from cache: %s' % (self.location, error), DWARNING)
            pass
Example #3
0
    def loadFromCache(self):
        """ load the data and the map from the cache """
        logger.log( 9, 'loadFromCache()')
        self.elocationData = util.read_pickle(self.cacheElocation)
        self.currentData = util.read_pickle(self.cacheCurrent)
        self.forecastData = util.read_pickle(self.cacheForecast)

        try:
            size = int(os.stat(self.mapFile)[6])
        except Exception, why:
            logger.warning('failed loading weather map for "%s" from cache: %s', self.location, why)
            pass
Example #4
0
    def loadFromCache(self):
        """ load the data and the map from the cache """
        logger.log(9, 'loadFromCache()')
        self.elocationData = util.read_pickle(self.cacheElocation)
        self.currentData = util.read_pickle(self.cacheCurrent)
        self.forecastData = util.read_pickle(self.cacheForecast)

        try:
            size = int(os.stat(self.mapFile)[6])
        except Exception, why:
            logger.warning(
                'failed loading weather map for "%s" from cache: %s',
                self.location, why)
            pass
Example #5
0
def build_word_count():
    if os.path.isfile('pickled/wcount.pickle'):
        return read_pickle('pickled/wcount.pickle')
    wcount = Counter()
    for fid in words.fileids():
        for word in words.words(fid):
            word = word.lower()
            if only_words.match(word) is not None:
                wcount[word] += 1
    for fid in gutenberg.fileids():
        for word in gutenberg.words(fid):
            word = word.lower()
            if only_words.match(word) is not None:
                wcount[word] += 1
    for fid in brown.fileids():
        for word in brown.words(fid):
            word = word.lower()
            if only_words.match(word) is not None:
                wcount[word] += 1
    for fid in reuters.fileids():
        for word in reuters.words(fid):
            word = word.lower()
            if only_words.match(word) is not None:
                wcount[word] += 1
    for fid in inaugural.fileids():
        for word in inaugural.words(fid):
            word = word.lower()
            if only_words.match(word) is not None:
                wcount[word] += 1
    dump_pickle(wcount, 'pickled/wcount.pickle')
    return wcount
Example #6
0
    def load_cache(self, dirname):
        """
        load a new cachefile
        """
        if dirname == self.current_cachedir:
            return

        if self.cache_modified:
            self.save_cache()

        cachefile = self.__get_filename__(dirname)
        logger.log( 9, 'load cache %s', cachefile)

        if config.MEDIAINFO_USE_MEMORY and self.all_directories.has_key(cachefile):
            self.current_objects = self.all_directories[cachefile]
        else:
            if os.path.isfile(cachefile):
                self.current_objects = util.read_pickle(cachefile)
                # maybe the cache file is broken and read_pickle returns None
                if not self.current_objects:
                    self.current_objects = {}
            else:
                self.current_objects = {}
            if config.MEDIAINFO_USE_MEMORY:
                self.all_directories[cachefile] = self.current_objects

        self.current_cachefile = cachefile
        self.current_cachedir  = dirname
        self.cache_modified    = False
Example #7
0
    def load_cache(self, dirname):
        """
        load a new cachefile
        """
        if dirname == self.current_cachedir:
            return

        if self.cache_modified:
            self.save_cache()

        cachefile = self.__get_filename__(dirname)
        logger.log( 9, 'load cache %s', cachefile)

        if config.MEDIAINFO_USE_MEMORY and self.all_directories.has_key(cachefile):
            self.current_objects = self.all_directories[cachefile]
        else:
            if os.path.isfile(cachefile):
                self.current_objects = util.read_pickle(cachefile)
                # maybe the cache file is broken and read_pickle returns None
                if not self.current_objects:
                    self.current_objects = {}
            else:
                self.current_objects = {}
            if config.MEDIAINFO_USE_MEMORY:
                self.all_directories[cachefile] = self.current_objects

        self.current_cachefile = cachefile
        self.current_cachedir  = dirname
        self.cache_modified    = False
Example #8
0
def getMameRomList():
    file_ver = None
    mameRomList = None

    if os.path.isfile(config.GAMES_MAME_CACHE):
        mameRomList = util.read_pickle(config.GAMES_MAME_CACHE)

        try:
            file_ver = mameRomList.TYPES_VERSION
        except AttributeError:
            print 'The cache does not have a version and must be recreated.'

        if file_ver != mame_types.TYPES_VERSION:
            print(
                ('MameRomList version number %s is stale (new is %s), must ' +
                 'be reloaded') % (file_ver, mame_types.TYPES_VERSION))
        else:
            if DEBUG:
                print 'Got MameRomList (version %s).' % file_ver

    if mameRomList == None:
        mameRomList = mame_types.MameRomList()

    print "MameRomList has %s items." % len(mameRomList.getMameRoms())
    return mameRomList
Example #9
0
def disc_info(media, force=False):
    """
    return kaa metadata disc information for the media
    """
    discinfo = mmpython.parse(media.devicename)
    if not discinfo or not discinfo.id:
        # bad disc, e.g. blank disc
        return {}

    cachedir  = os.path.join(config.OVERLAY_DIR, 'disc/metadata')
    cachefile = os.path.join(cachedir, discinfo.id + '.freevo')

    if os.path.isfile(cachefile):
        metainfo = util.read_pickle(cachefile)
    else:
        metainfo = {}

    if force or discinfo.mime == 'unknown/unknown' and not metainfo.has_key('disc_num_video'):
        media.mount()
        for type in ('video', 'audio', 'image'):
            items = getattr(config, '%s_SUFFIX' % type.upper())
            files = util.match_files_recursively(media.mountdir, items)
            metainfo['disc_num_%s' % type] = len(files)
        media.umount()
        util.save_pickle(metainfo, cachefile)

    info = Info(cachefile, discinfo, metainfo)
    info.disc = True
    return info
def load_test_data(test_file, max_length=100, vocabulary=None, config=None):
    """
    Loads and preprocessed data for the MR dataset.
    Returns input vectors, labels, vocabulary, and inverse vocabulary.
    """
    contents = util.read_txt(test_file)
    lines = [line for line in contents]
    labels = []
    x_text = []
    y = None
    if config is None:
        x_text = [s.split()[:max_length] for s in lines]
    else:
        y_text = []
        label_dict = util.read_txt_to_dict(config)
        for line in lines:
            line = line.split(' <> ')
            x_text.append(line[1].split()[:max_length])
            labels.append(line[0])
            label_num = label_dict[line[0].strip()]
            y_text.append(label_num)
        y = np.array(y_text)

    sentences_padded = pad_sentences(x_text, max_length)
    vocabulary = util.read_pickle(vocabulary)
    x = np.array([[vocabulary.get(word, 0) for word in sentence] for sentence in sentences_padded])

    return x, contents, labels, y
Example #11
0
def disc_info(media, force=False):
    """
    return kaa metadata disc information for the media
    """
    discinfo = mmpython.parse(media.devicename)
    if not discinfo or not discinfo.id:
        # bad disc, e.g. blank disc
        return {}

    cachedir  = os.path.join(config.OVERLAY_DIR, 'disc/metadata')
    cachefile = os.path.join(cachedir, discinfo.id + '.freevo')

    if os.path.isfile(cachefile):
        metainfo = util.read_pickle(cachefile)
    else:
        metainfo = {}

    if force or discinfo.mime == 'unknown/unknown' and not metainfo.has_key('disc_num_video'):
        media.mount()
        for type in ('video', 'audio', 'image'):
            items = getattr(config, '%s_SUFFIX' % type.upper())
            files = util.match_files_recursively(media.mountdir, items)
            metainfo['disc_num_%s' % type] = len(files)
        media.umount()
        util.save_pickle(metainfo, cachefile)

    info = Info(cachefile, discinfo, metainfo)
    info.disc = True
    return info
Example #12
0
def build_stem_queries():
    stemmer_idx = read_pickle('pickled/stemmerIdx3.pickle')
    for stemmer, stemdic in stemmer_idx.items():
        print('building queries for %s' % stemmer)
        queries = []
        c = 0
        for stem in stemdic.values():
            if len(stem.stemsTo) > 1:
                c += 3
                for c1, c2 in combinations(stem.stemsTo, 2):
                    if c1 is None or c2 is None:
                        raise Exception('bad field')
                    queries.append({
                        'number':
                        '%s,%s' % (stem.stem, c1),
                        'text':
                        '#combine( #dirichlet( #extents:@/%s/:part=postings() ) )'
                        % c1
                    })
                    queries.append({
                        'number':
                        '%s,%s' % (stem.stem, c2),
                        'text':
                        '#combine( #dirichlet( #extents:@/%s/:part=postings() ) )'
                        % c2
                    })
                    # #max( #extents:@/replicate/:part=postings() #extents:@/replicating/:part=postings())
                    queries.append({
                        'number':
                        '%s,%s,%s' % (stem.stem, c1, c2),
                        'text':
                        '#combine(#uw:50( #extents:@/%s/:part=postings() #extents:@/%s/:part=postings())'
                        % (c1, c2)
                    })
            else:
                c += 1
                queries.append({
                    'number':
                    '%s,%s' % (stem.stem, stem.stemsTo[0]),
                    'text':
                    '#combine( #dirichlet( #extents:@/%s/:part=postings() ) )'
                    % stem.stemsTo[0]
                })
        qloc = 'galagoqueries/window50/%s.json' % stemmer
        with open(qloc, 'w') as qout:
            json.dump(
                {
                    'queries': queries,
                    'index': 'index3',
                    'queryType': 'complex'
                },
                qout,
                indent=2)
        print('executing queries for %s' % stemmer)
        cline = './rungalago.sh threaded-batch-search %s' % qloc
        with open('output_files/window50/%s_query_ret.trec' % stemmer,
                  'w') as retOut:
            runner = Popen(shlex.split(cline), stdout=retOut, stderr=PIPE)
            print(runner.stderr.read())
Example #13
0
def association_measures():
    ab_count, ab_count_wins, wins = read_pickle('pickled/window5Counts.pickle')
    rets = []
    for (a, b) in ab_count_wins.keys():
        rets.append(Win5Ret(a, b, ab_count_wins, ab_count, wins))
    with open('output_files/window5/ordered5associationret.csv', 'w') as out:
        for ret in sorted(rets, key=lambda w5r: w5r.dice, reverse=True):
            ret.write_csv(out)
Example #14
0
def get_link_df():
    if not os.path.exists(wsmall_latest_linkdf):
        names = ['wsmall_file', 'href']
        ldf = pd.read_csv(wsmall_latest_links, sep=',', names=names)
        dump_pickle(ldf, wsmall_latest_linkdf)
        return ldf
    else:
        return read_pickle(wsmall_latest_linkdf)
Example #15
0
def delete_old_files_2():
    """
    delete cachfiles/entries for files which don't exists anymore
    """
    print checking('deleting old web-server thumbnails'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(vfs.www_image_cachedir(),
                                             config.IMAGE_SUFFIX):
        if not vfs.isfile(
                file[len(vfs.www_image_cachedir()):file.rindex('.')]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting old cache files'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(config.OVERLAY_DIR, ['raw']):
        if file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        if not vfs.isfile(file[len(config.OVERLAY_DIR):-4]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting cache for directories not existing anymore'),
    subdirs = util.get_subdirs_recursively(config.OVERLAY_DIR)
    subdirs.reverse()
    for file in subdirs:
        if not os.path.isdir(file[len(config.OVERLAY_DIR):]) and not \
                file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            for metafile in ('cover.png', 'cover.png.raw', 'cover.jpg',
                             'cover.jpg.raw', 'mmpython.cache',
                             'freevo.cache'):
                if os.path.isfile(os.path.join(file, metafile)):
                    os.unlink(os.path.join(file, metafile))
            if not os.listdir(file):
                os.rmdir(file)
    print 'done'

    print checking('deleting old entries in meta-info'),
    sys.__stdout__.flush()
    for filename in util.recursefolders(config.OVERLAY_DIR, 1, 'freevo.cache',
                                        1):
        if filename.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        sinfo = os.stat(filename)
        if not sinfo[ST_SIZE]:
            #print '%s is empty' % filename
            continue
        dirname = os.path.dirname(filename)[len(config.OVERLAY_DIR):]
        data = util.read_pickle(filename)
        for key in copy.copy(data):
            if not os.path.exists(os.path.join(dirname, str(key))):
                del data[key]
        util.save_pickle(data, filename)
    print 'done'
Example #16
0
def disc_info(media, force=False):
    """
    return kaa metadata disc information for the media
    """
    type, id = mmpython.cdrom.status(media.devicename)
    if not id:
        # bad disc, e.g. blank disc
        return {}

    cachedir = os.path.join(config.OVERLAY_DIR, 'disc/metadata')
    cachefile = os.path.join(cachedir, id + '.mmpython')

    if os.path.isfile(cachefile) and not force:
        mmdata = util.read_pickle(cachefile)
    else:
        mmdata = mmpython.parse(media.devicename)
        if not mmdata:
            print '*****************************************'
            print 'Error detecting the disc in %r' % (media.devicename)
            print 'Please contact the developers'
            print '*****************************************'
            return {}
        else:
            util.save_pickle(mmdata, cachefile)

    cachefile = os.path.join(cachedir, id + '.freevo')

    if os.path.isfile(cachefile):
        metainfo = util.read_pickle(cachefile)
    else:
        metainfo = {}

    if mmdata.mime == 'unknown/unknown' and not metainfo.has_key(
            'disc_num_video'):
        media.mount()
        for type in ('video', 'audio', 'image'):
            items = getattr(config, '%s_SUFFIX' % type.upper())
            files = util.match_files_recursively(media.mountdir, items)
            metainfo['disc_num_%s' % type] = len(files)
        media.umount()
        util.save_pickle(metainfo, cachefile)

    info = Info(cachefile, mmdata, metainfo)
    info.disc = True
    return info
Example #17
0
    def __init__(self):
        """
        init the skin engine
        """
        global skin_engine
        skin_engine = self

        self.display_style = { 'menu' : 0 }
        self.force_redraw  = True
        self.last_draw     = None, None, None
        self.screen        = screen.get_singleton()
        self.areas         = {}
        self.suspended     = False
        self.transitioning = False
        self.current_screen= None
        self.next_screen   = None
        self.timer         = None

        # load default areas
        from listing_area   import Listing_Area
        from tvlisting_area import TVListing_Area
        from view_area      import View_Area
        from info_area      import Info_Area
        from default_areas  import Screen_Area, Title_Area, Subtitle_Area, Plugin_Area
        from scrollabletext_area import Scrollabletext_Area
        from textentry_area import Textentry_Area
        from buttongroup_area import Buttongroup_Area

        for a in ('screen', 'title', 'subtitle', 'view', 'listing', 'info',
                'plugin', 'scrollabletext', 'textentry',  'buttongroup'):
            self.areas[a] = eval('%s_Area()' % a.capitalize())
        self.areas['tvlisting'] = TVListing_Area()

        self.storage_file = os.path.join(config.FREEVO_CACHEDIR, 'skin-%s' % os.getuid())
        self.storage = util.read_pickle(self.storage_file)
        if self.storage:
            if not config.SKIN_XML_FILE:
                config.SKIN_XML_FILE = self.storage['SKIN_XML_FILE']
            else:
                logger.log( 9, 'skin forced to %s', config.SKIN_XML_FILE)
        else:
            if not config.SKIN_XML_FILE:
                config.SKIN_XML_FILE = config.SKIN_DEFAULT_XML_FILE
            self.storage = {}

        # load the fxd file
        self.settings = xml_skin.XMLSkin()
        self.set_base_fxd(config.SKIN_XML_FILE)

        if config.SKIN_SCREEN_TRANSITION == 'blend':
            self.do_transition = self.do_blend_transition
        elif config.SKIN_SCREEN_TRANSITION == 'slide':
            self.do_transition = self.do_slide_transition
        else:
            print 'WARNING: Unknown screen transition, disabling transitions'
            config.SKIN_USE_SCREEN_TRANSITIONS = False
            config.SKIN_USE_PAGE_TRANSITIONS = False
Example #18
0
def check_webarchive(ldf):
    if os.path.exists(wsmall_latest_webarchive):
        archive_sites = []
        for archive_site in archiving_sites:
            archive_sites.append(ldf[ldf.href.str.startswith(archive_site)])
        about_wba = pd.concat(archive_sites)
        dump_pickle(about_wba, wsmall_latest_webarchive)
        return about_wba
    else:
        return read_pickle(wsmall_latest_webarchive)
Example #19
0
def get_users2(ureviews, mset):
    if not os.path.exists(usr_pickle2):
        def user_trans(lines):
            return map(lambda line: UserWReviews(split=usr_split.split(line), ureviews=ureviews, mset=mset), lines)

        with SelectFromFile(user_file, transformer=user_trans, selector=lambda x: list(x)) as it:
            usrs = it
        dump_pickle(usrs, usr_pickle2)
    else:
        usrs = read_pickle(usr_pickle2)
    return usrs
Example #20
0
 def getsiteheadlines(self):
     headlines = []
     pfile = os.path.join(self.cachedir, 'headlines-%i' % self.location_index)
     if (os.path.isfile(pfile) == 0 or \
         (abs(time.time() - os.path.getmtime(pfile)) > MAX_HEADLINE_AGE)):
         #print 'Fresh Headlines'
         headlines = self.fetchheadlinesfromurl()
     else:
         #print 'Cache Headlines'
         headlines = util.read_pickle(pfile)
     return headlines
Example #21
0
def delete_old_files_2():
    """
    delete cachfiles/entries for files which don't exists anymore
    """
    print checking('deleting old web-server thumbnails'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(vfs.www_image_cachedir(), config.IMAGE_SUFFIX):
        if not vfs.isfile(file[len(vfs.www_image_cachedir()):file.rindex('.')]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting old cache files'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(config.OVERLAY_DIR, ['raw']):
        if file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        if not vfs.isfile(file[len(config.OVERLAY_DIR):-4]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting cache for directories not existing anymore'),
    subdirs = util.get_subdirs_recursively(config.OVERLAY_DIR)
    subdirs.reverse()
    for file in subdirs:
        if not os.path.isdir(file[len(config.OVERLAY_DIR):]) and not \
                file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            for metafile in ('cover.png', 'cover.png.raw', 'cover.jpg', 'cover.jpg.raw', 'mmpython.cache',
                    'freevo.cache'):
                if os.path.isfile(os.path.join(file, metafile)):
                    os.unlink(os.path.join(file, metafile))
            if not os.listdir(file):
                os.rmdir(file)
    print 'done'

    print checking('deleting old entries in meta-info'),
    sys.__stdout__.flush()
    for filename in util.recursefolders(config.OVERLAY_DIR, 1, 'freevo.cache', 1):
        if filename.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        sinfo = os.stat(filename)
        if not sinfo[ST_SIZE]:
            #print '%s is empty' % filename
            continue
        dirname = os.path.dirname(filename)[len(config.OVERLAY_DIR):]
        data    = util.read_pickle(filename)
        for key in copy.copy(data):
            if not os.path.exists(os.path.join(dirname, str(key))):
                del data[key]
        util.save_pickle(data, filename)
    print 'done'
Example #22
0
    def loadFromCache(self):
        self.weatherData = util.read_pickle(self.cacheFile)

        try:
            size = int(os.stat(self.mapFile)[6])
        except:
            _debug_("failed attempting to load %s radar map from cache" % (self.location,), DERROR)
            pass
        else:
            imgfd = os.open(self.mapFile, os.R_OK)
            self.weatherMapData = os.read(imgfd, size)
            os.close(imgfd)
Example #23
0
def coverageReport():
    stemmer_idx = read_pickle('pickled/stemmerIdx3.pickle')
    for stemmer in [
            'Lancaster', 'WordNetLemmatizer', 'PorterStemmer',
            'SnowballStemmer'
    ]:
        with open('output_files/window50/%s_dice_connected.txt' % stemmer, 'r') as sin, \
                open('output_files/window50/%s_dice_sconnected.txt' % stemmer, 'r') as sin2:
            print('generating coverage report for %s' % stemmer)
            stemdic = stemmer_idx[stemmer]
            writeCoverage(stemmer, stemdic, 'connected', sin)
            writeCoverage(stemmer, stemdic, 'sconnected', sin2)
Example #24
0
 def getsiteheadlines(self):
     headlines = []
     pfile = os.path.join(self.cachedir,
                          'headlines-%i' % self.location_index)
     if (os.path.isfile(pfile) == 0 or \
         (abs(time.time() - os.path.getmtime(pfile)) > MAX_HEADLINE_AGE)):
         #print 'Fresh Headlines'
         headlines = self.fetchheadlinesfromurl()
     else:
         #print 'Cache Headlines'
         headlines = util.read_pickle(pfile)
     return headlines
Example #25
0
    def loadFromCache(self):
        self.weatherData = util.read_pickle(self.cacheFile)

        try:
            size = int(os.stat(self.mapFile)[6])
        except:
            _debug_("failed attempting to load %s radar map from cache" % (self.location,), DERROR)
            pass
        else:
            imgfd = os.open(self.mapFile, os.R_OK)
            self.weatherMapData = os.read(imgfd, size)
            os.close(imgfd)
Example #26
0
def get_users(reviews=None):
    if not os.path.exists(usr_pickle):
        def user_trans(lines):
            return map(lambda line: User(usr_split.split(line), reviews), lines)

        usrs = {}
        with SelectFromFile(user_file, transformer=user_trans, selector=lambda x: list(x)) as it:
            for u in it:
                usrs[u.id] = u
        dump_pickle(usrs, usr_pickle)
    else:
        usrs = read_pickle(usr_pickle)
    return usrs
Example #27
0
def domain_info(ldf=None):
    if not os.path.exists(wsmall_latest_no_wlink):
        no_wlinks = ldf[ldf.href.str.contains(
            r'(www)|(http)')][~ldf.href.str.contains('wiki')]
        no_wlinks['href'] = no_wlinks['href'].apply(front_slash_nuke)
        dump_pickle(no_wlinks, wsmall_latest_no_wlink)
        if not os.path.exists(wsmall_latest_no_wlinkd):
            no_wlinks['domain'] = no_wlinks.href.map(domain_getter)
            dump_pickle(no_wlinks, wsmall_latest_no_wlinkd)
    else:
        no_wlinks = read_pickle(wsmall_latest_no_wlinkd)

    return no_wlinks
Example #28
0
def get_movies():
    if not os.path.exists(movie_pickle):
        def move_clean_split(line):
            return Movie(msplit.split(msanity.sub('|', line.rstrip())))

        movie_map = {}
        with codecs.open(movie_f, 'r', encoding='utf-8', errors='replace') as movs:
            for mov in map(move_clean_split, movs):
                movie_map[mov.mid] = mov
        dump_pickle(movie_map, movie_pickle)
    else:
        movie_map = read_pickle(movie_pickle)
    return movie_map
Example #29
0
def check_ar_outlinks(ars):
    if not os.path.exists(wsmall_statues_ar):
        result = {}
        temp = []
        processed = 0
        c = 0
        with FuturesSession(
                session=requests.Session(),
                executor=ProcessPoolExecutor(max_workers=10)) as session:
            for href in ars.href:
                temp.append(href)
                if len(temp) >= 100:
                    pending = []
                    for url in temp:
                        result[url] = -1
                        pending.append(
                            session.head(url,
                                         headers={'User-Agent': useragents[c]},
                                         timeout=5.0))
                        c += 1
                        if c == 3:
                            c = 0
                    for future in pending:
                        try:
                            response = future.result()
                            url = response.url
                            scode = response.status_code
                            result[url] = scode
                        except Exception as e:
                            one = 1
                        processed += 1
                        if processed % 100 == 0:
                            print(processed)
                    temp.clear()
        print('outise the with')
        had_status = {'archive': [], 'status': []}
        timed_out = {'archive': [], 'status': []}
        for k, v in result.items():
            ar = archives_map(k)
            if v == -1:
                timed_out['archive'].append(ar)
                timed_out['status'].append(v)
                continue
            had_status['archive'].append(ar)
            had_status['status'].append(v)
        hs = pd.DataFrame(had_status)
        to = pd.DataFrame(timed_out)
        dump_pickle((hs, to), wsmall_statues_ar)
        return hs, to
    else:
        return read_pickle(wsmall_statues_ar)
Example #30
0
def get_movies2():
    if not os.path.exists(movie_pickle2):
        def move_clean_split(line):
            return Movie(msplit.split(msanity.sub('|', line.rstrip())))

        movies = []
        movie_idx = {}
        with codecs.open(movie_f, 'r', encoding='utf-8', errors='replace') as movs:
            for idx, mov in enumerate(map(move_clean_split, movs)):
                movies.append(mov)
                movie_idx[mov.mid] = idx
        dump_pickle((movies, movie_idx), movie_pickle2)
    else:
        movies, movie_idx = read_pickle(movie_pickle2)
    return movies, movie_idx
Example #31
0
 def getsiteheadlines(self):
     """
     this returns the raw headlines (title, link and description tuples),
     it reads them from the cache file or fetches them from the internet.
     """
     headlines = []
     pfile = os.path.join(self.cachedir, 'itv-%i' % self.location_index)
     if (os.path.isfile(pfile) == 0 or \
         (abs(time.time() - os.path.getmtime(pfile)) > MAX_HEADLINE_AGE)):
         #print 'Fresh Headlines'
         headlines = self.fetchheadlinesfromurl()
     else:
         #print 'Cache Headlines'
         headlines = util.read_pickle(pfile)
     return headlines
Example #32
0
def get_reviews2():
    if not os.path.exists(usr_review_pickle2):
        def review_mapper(line):
            ur = UserReview(split=usr_ratting.split(line.rstrip()))
            return ur

        def trans(rvs):
            return seq(rvs).map(review_mapper).group_by(lambda ur: ur.uid).to_dict()

        with SelectFromFile(usr_review_file, transformer=trans, selector=lambda x: x) as r:
            reviews = r
        dump_pickle(reviews, usr_review_pickle2)
    else:
        reviews = read_pickle(usr_review_pickle2)

    return reviews
Example #33
0
def get_reviews(movie_map):
    if not os.path.exists(usr_review_pickle):
        def review_mapper(line):
            ur = URating(usr_ratting.split(line.rstrip()))
            ur.mname = movie_map.get(ur.itemid, None)
            return ur

        def trans(rvs):
            return seq(rvs).map(review_mapper).group_by(lambda ur: ur.uid).to_dict()

        with SelectFromFile(usr_review_file, transformer=trans, selector=lambda x: x) as r:
            reviews = r
        dump_pickle(reviews, usr_review_pickle)
    else:
        reviews = read_pickle(usr_review_pickle)

    return reviews
Example #34
0
    def load_cache(self, filename):
        """
        load a skin cache file
        """
        if hasattr(self, '__last_load_cache__'
                   ) and self.__last_load_cache__[0] == filename:
            return self.__last_load_cache__[1]

        if not os.path.isfile(filename):
            return None

        cache = self.cachename(filename)
        if not cache:
            return None

        if not os.path.isfile(cache):
            return None

        version, settings = util.read_pickle(cache)
        if not settings or version != xml_skin.FXD_FORMAT_VERSION:
            return None

        pdir = os.path.join(config.SHARE_DIR, 'skins/plugins')
        if os.path.isdir(pdir):
            ffiles = util.match_files(pdir, ['fxd'])
        else:
            ffiles = []

        for f in settings.fxd_files:
            if not os.path.dirname(f).endswith(pdir):
                ffiles.append(f)

        # check if all files used by the skin are not newer than
        # the cache file
        ftime = os.stat(cache)[stat.ST_MTIME]
        for f in ffiles:
            if os.stat(f)[stat.ST_MTIME] > ftime:
                return None

        # restore the font objects
        for f in settings.font:
            settings.font[f].font = osd.getfont(settings.font[f].name,
                                                settings.font[f].size)
        self.__last_load_cache__ = filename, settings
        return settings
Example #35
0
    def __init__(self):
        """
        init the skin engine
        """
        global skin_engine
        skin_engine = self

        self.display_style = {'menu': 0}
        self.force_redraw = True
        self.last_draw = None, None, None
        self.screen = screen.get_singleton()
        self.areas = {}

        # load default areas
        from listing_area import Listing_Area
        from tvlisting_area import TVListing_Area
        from view_area import View_Area
        from info_area import Info_Area
        from default_areas import Screen_Area, Title_Area, Subtitle_Area, Plugin_Area
        from scrollabletext_area import Scrollabletext_Area
        from textentry_area import Textentry_Area
        from buttongroup_area import Buttongroup_Area

        for a in ('screen', 'title', 'subtitle', 'view', 'listing', 'info',
                  'plugin', 'scrollabletext', 'textentry', 'buttongroup'):
            self.areas[a] = eval('%s_Area()' % a.capitalize())
        self.areas['tvlisting'] = TVListing_Area()

        self.storage_file = os.path.join(config.FREEVO_CACHEDIR,
                                         'skin-%s' % os.getuid())
        self.storage = util.read_pickle(self.storage_file)
        if self.storage:
            if not config.SKIN_XML_FILE:
                config.SKIN_XML_FILE = self.storage['SKIN_XML_FILE']
            else:
                _debug_('skin forced to %s' % config.SKIN_XML_FILE, 2)
        else:
            if not config.SKIN_XML_FILE:
                config.SKIN_XML_FILE = config.SKIN_DEFAULT_XML_FILE
            self.storage = {}

        # load the fxd file
        self.settings = xml_skin.XMLSkin()
        self.set_base_fxd(config.SKIN_XML_FILE)
Example #36
0
    def load_cache(self, filename):
        """
        load a skin cache file
        """
        if hasattr(self, '__last_load_cache__') and self.__last_load_cache__[0] == filename:
            return self.__last_load_cache__[1]

        if not os.path.isfile(filename):
            return None

        cache = self.cachename(filename)
        if not cache:
            return None

        if not os.path.isfile(cache):
            return None

        version, settings = util.read_pickle(cache)
        if not settings or version != xml_skin.FXD_FORMAT_VERSION:
            return None

        pdir = os.path.join(config.SHARE_DIR, 'skins/plugins')
        if os.path.isdir(pdir):
            ffiles = util.match_files(pdir, [ 'fxd' ])
        else:
            ffiles = []

        for f in settings.fxd_files:
            if not os.path.dirname(f).endswith(pdir):
                ffiles.append(f)

        # check if all files used by the skin are not newer than
        # the cache file
        ftime = os.stat(cache)[stat.ST_MTIME]
        for f in ffiles:
            if os.stat(f)[stat.ST_MTIME] > ftime:
                return None

        # restore the font objects
        for f in settings.font:
            settings.font[f].font = osd.getfont(settings.font[f].name, settings.font[f].size)
        self.__last_load_cache__ = filename, settings
        return settings
Example #37
0
def vocab_backwards(wfile, outfile):
    w_list, w_set = read_pickle(wfile)
    toke = WordPunctTokenizer()
    vocab = set()
    vocab_count = 0
    word_count = 0
    with open(outfile, 'w+') as vout:
        vout.write('wc,vc\n')
        for wf in reversed(w_list):
            with open(wf, 'r') as wIn:
                wSoup = BeautifulSoup(wIn.read(), 'lxml')
                for token in toke.tokenize(no_wspace_punk.sub(' ', wSoup.text)):
                    if len(token) > 1:
                        word_count += 1
                        if token not in vocab:
                            vocab.add(token)
                            vocab_count += 1
                            out = '%d,%d\n' % (word_count, vocab_count)
                            vout.write(out)
Example #38
0
 def __init__(self, filename):
     """
     Load the file and parse it. If the file does not exist, create
     an empty <freevo> node.
     """
     Parser.__init__(self)
     self.filename = filename
     if not vfs.isfile(filename):
         self.tree = XMLnode('freevo')
     else:
         self.tree = None
         cache = vfs.getoverlay(filename + '.raw')
         if os.path.isfile(filename) and os.path.isfile(cache) and \
                os.stat(cache)[stat.ST_MTIME] >= os.stat(filename)[stat.ST_MTIME]:
             self.tree = util.read_pickle(cache)
         if not self.tree:
             f = vfs.open(filename)
             self.tree = self.parse(f)
             f.close()
             if self.tree:
                 util.save_pickle(self.tree, cache)
Example #39
0
    def _buildAnalysisPage(self, builder, refresh=False):
        if not path.exists(self.schPath):
            return "no schedule"
            return """
                <html>
                <head>
                <link href="/css/bootstrap.min.css" rel="stylesheet">
                <link href="/css/bootstrap-theme.min.css" rel="stylesheet">
                </head>
                <body>
                    {}
                    <div class="container" role="main">
                        No Schedule.
                    </div>
                </body></html>
        """.format(htmlView.HtmlMenu().__html__())

        config, matchL, optState = read_pickle(self.schPath)
        doc = htmlView.HtmlDoc(builder(config, matchL, optState))
        if refresh:
            doc.head.add(htmlView.HtmlRefresh())
        return doc.__html__()
Example #40
0
def getMameRomList():
    file_ver = None
    mameRomList = None

    if os.path.isfile(config.GAMES_MAME_CACHE):
        mameRomList = util.read_pickle(config.GAMES_MAME_CACHE)

        try:
            file_ver = mameRomList.TYPES_VERSION
        except AttributeError:
            print 'The cache does not have a version and must be recreated.'

        if file_ver != mame_types.TYPES_VERSION:
            print (('MameRomList version number %s is stale (new is %s), must ' +
                    'be reloaded') % (file_ver, mame_types.TYPES_VERSION))
        else:
            logger.debug('Got MameRomList (version %s).', file_ver)

    if mameRomList == None:
        mameRomList = mame_types.MameRomList()

    print "MameRomList has %s items." % len(mameRomList.getMameRoms())
    return mameRomList
Example #41
0
else:
    mmpython.factory.DEBUG = 0

mmpython.USE_NETWORK = config.SYS_USE_NETWORK
mmpython.cdrom.CREATE_MD5_ID = config.MMPYTHON_CREATE_MD5_ID



# some checking when starting Freevo
if __freevo_app__ == 'main':
    try:
        import kaa.metadata.version
        import time

        cachefile = os.path.join(config.FREEVO_CACHEDIR, 'mediainfo')
        info = util.read_pickle(cachefile)
        if not info:
            print
            print 'Error: can\'t detect last cache rebuild'
            print 'Please run \'freevo cache\''
            print
            del_cache()
        else:
            if len(info) == 3:
                mmchanged, part_update, complete_update = info
                freevo_changed = 0
            else:
                mmchanged, freevo_changed, part_update, complete_update = info
            # let's warn about some updates
            if freevo_changed == 0:
                print
Example #42
0
        """update the cache data from the 1click service
        @note: the elocation is not updated as it is static
        """
        logger.log( 9, 'updateData()')
        if GUI:
            popup = PopupBox(text=_('Fetching Weather for %s...') % self.popupParam)
            popup.show()

        if not os.path.isfile(self.cacheElocation):
            try:
                elocationData = wget(self.url_eloc)
                self.elocationData = elocationData
            except Exception, why:
                logger.warning('Failed to get extended location data for %s: %s', self.location, why)
        else:
            self.elocationData = util.read_pickle(self.cacheElocation)

        try:
            self.currentData = wget(self.url_curc)
            #print 'currentData:', self.currentData
        except Exception, why:
            logger.warning('Failed to get the current conditions data for %s: %s', self.location, why)
            if os.path.isfile(self.cacheCurrent):
                self.currentData = util.read_pickle(self.cacheCurrent)
            else:
                self.currentData = None
        try:
            self.forecastData = wget(self.url_dayf)
            #print 'forecastData:', self.forecastData
        except Exception, why:
            logger.warning('Failed to get the forecast data for %s: %s', self.location, why)
Example #43
0
def get_guide(popup=False, XMLTV_FILE=None):
    """
    Get a TV guide from memory cache, file cache or raw XMLTV file.
    Tries to return at least the channels from the config file if there
    is no other data
    """
    global cached_guide

    if not XMLTV_FILE:
        XMLTV_FILE = config.XMLTV_FILE

    if popup:
        import dialog.dialogs
        popup_dialog = dialog.dialogs.ProgressDialog( _('Preparing the program guide'), indeterminate=True)
        
    # Can we use the cached version (if same as the file)?
    if (cached_guide == None or
        (os.path.isfile(XMLTV_FILE) and
         cached_guide.timestamp != os.path.getmtime(XMLTV_FILE))):

        # No, is there a pickled version ("file cache") in a file?
        pname = '%s/TV.xml.pickled' % config.FREEVO_CACHEDIR

        got_cached_guide = False
        if (os.path.isfile(XMLTV_FILE) and
            os.path.isfile(pname) and (os.path.getmtime(pname) > os.path.getmtime(XMLTV_FILE))):
            logger.debug('XMLTV, reading cached file (%s)', pname)

            if popup:
                popup_dialog.show()
                inprogress = kaa.ThreadCallable(util.read_pickle, pname)()
                inprogress.wait()
                cached_guide = inprogress.result
            else:
                cached_guide = util.read_pickle(pname)

            epg_ver = None
            try:
                epg_ver = cached_guide.EPG_VERSION
            except AttributeError:
                logger.debug('EPG does not have a version number, must be reloaded')

            if epg_ver != EPG_VERSION:
                logger.debug('EPG version missmatch, must be reloaded')

            elif cached_guide.timestamp != os.path.getmtime(XMLTV_FILE):
                # Hmmm, weird, there is a pickled file newer than the TV.xml
                # file, but the timestamp in it does not match the TV.xml
                # timestamp. We need to reload!
                logger.debug('EPG: Pickled file timestamp mismatch, reloading!')

            else:
                logger.info('XMLTV, got cached guide (version %s).', epg_ver)
                got_cached_guide = True

        if not got_cached_guide:
            # Need to reload the guide

            

            logger.debug('XMLTV, trying to read raw file (%s)', XMLTV_FILE)
            try:
                if popup:
                    popup_dialog.set_indeterminate(False)
                    popup_dialog.show()
                    inprogress = kaa.ThreadCallable(load_guide, XMLTV_FILE, popup_dialog)()
                    inprogress.wait()
                    cached_guide = inprogress.result
                    popup_dialog.set_indeterminate(True)
                else:
                    cached_guide = load_guide(XMLTV_FILE)
            except:
                # Don't violently crash on a incomplete or empty TV.xml please.
                cached_guide = None
                print
                print String(_("Couldn't load the TV Guide, got an exception!"))
                print
                traceback.print_exc()
            else:
                # Replace config.XMLTV_FILE before we save the pickle in order
                # to avoid timestamp confision.
                if XMLTV_FILE != config.XMLTV_FILE:
                    logger.info('copying %r -> %r', XMLTV_FILE, config.XMLTV_FILE)
                    shutil.copyfile(XMLTV_FILE, config.XMLTV_FILE)
                    os.unlink(XMLTV_FILE)
                    cached_guide.timestamp = os.path.getmtime(config.XMLTV_FILE)

                # Dump a pickled version for later reads
                if popup:
                    kaa.ThreadCallable(util.save_pickle, cached_guide, pname)().wait()
                else:
                    util.save_pickle(cached_guide, pname)

    if not cached_guide:
        # An error occurred, return an empty guide
        cached_guide = TvGuide()

    if popup:
        popup_dialog.hide()

    return cached_guide