Ejemplo n.º 1
0
def main_pkl(f_name, out_fname):
    """
    Converts the twitter data to pickle with each element being dictionary with keys
    handle, text, and time
    """
    # Read in the file
    fid = open(f_name, 'r')
    out_arr = fid.read().split('\n')
    process_data = []

    # Loop over all the data
    for ele in out_arr:
        twit_split = ele.split('||')

        # Check if the data has the correct format (3 ||)
        if len(twit_split) != 4:
            logging.info('Twitter sample: {}'.format(ele))
            continue
        assert (len(twit_split[-1]) == 0)
        # Convert timestamp and add to process_data
        time_stamp = convert_timestamp(twit_split[-2])
        if time_stamp:
            process_data.append({
                'handle': twit_split[0],
                'text': twit_split[1],
                'time': time_stamp
            })
        else:
            logging.debug('Time Stamp Not Detected: {}'.format(ele))

    save_pickle({'dat': process_data}, out_fname)
    logging.info(
        'Length of raw data: {} process data: {} pickle name:{}'.format(
            len(out_arr), len(process_data), out_fname))
Ejemplo n.º 2
0
 def get_accuracy_set(self, data_set,decoder,prefix):
     data_loader = DataLoader(data_set, batch_size=self.batch_size)
     main_arr = np.array([])
     counter = 0
     self.ac_arr, self.pred_arr = [],[]
     for data in data_loader:
         # Finding the predicted label and getting the loss function
         img, label = data
         if self.CUDA_val is True:
             img = Variable(img).cuda()
         else:
             img = Variable(img)
         main_arr = np.concatenate((main_arr, np.array([self.accuracy_func(self.main_model(img), label)])))
         counter += 1
         if counter%2 == 0:
             logging.debug('Current Accuracy: {}'.format(np.mean(main_arr)))
     logging.info('Total Samples: {} Accuracy: {}'.format(data_set.__len__(), np.mean(main_arr)))
     return_dict = {'ac':self.ac_arr,'pred':self.pred_arr}
     save_pickle(return_dict,prefix+'_stats.pkl')
     if decoder is False:
         return
     # Reconstruct Image
     base_fname = self.dataset_name + "_e" + str(self.epochs) + "_b" + str(self.batch_size) + ".png"
     recons_img_fname = "recon_" + base_fname
     truth_img_fname = "truth_" + base_fname
     decoder = self.main_model.get_decoder()
     output = self.main_model(img)
     recon = decoder(output, label)
     recon_img = recon.view(-1, self.main_model.img_channel, self.main_model.img_width, self.main_model.img_height) # _, channel, width, height
     # Save Reconstruction and Ground Truth
     torchvision.utils.save_image(recon_img.cpu().data, recons_img_fname)
     torchvision.utils.save_image(img.cpu().data, truth_img_fname)
Ejemplo n.º 3
0
def disc_info(media, force=False):
    """
    return kaa metadata disc information for the media
    """
    discinfo = mmpython.parse(media.devicename)
    if not discinfo or not discinfo.id:
        # bad disc, e.g. blank disc
        return {}

    cachedir  = os.path.join(config.OVERLAY_DIR, 'disc/metadata')
    cachefile = os.path.join(cachedir, discinfo.id + '.freevo')

    if os.path.isfile(cachefile):
        metainfo = util.read_pickle(cachefile)
    else:
        metainfo = {}

    if force or discinfo.mime == 'unknown/unknown' and not metainfo.has_key('disc_num_video'):
        media.mount()
        for type in ('video', 'audio', 'image'):
            items = getattr(config, '%s_SUFFIX' % type.upper())
            files = util.match_files_recursively(media.mountdir, items)
            metainfo['disc_num_%s' % type] = len(files)
        media.umount()
        util.save_pickle(metainfo, cachefile)

    info = Info(cachefile, discinfo, metainfo)
    info.disc = True
    return info
Ejemplo n.º 4
0
def build_graph(ids, vocabs, pmi, tfidf):
    if util.is_exist("graph.pkl"):
        print("loaded")
        return util.load_pickle("graph.pkl")
    G = networkx.Graph()
    G.add_nodes_from(ids)
    G.add_nodes_from(vocabs)

    cn2 = lambda x: x * (x - 1) // 2
    print("Calculating word_word edges")
    for (i, w1), (j, w2) in tqdm(combinations(enumerate(vocabs), 2),
                                 total=cn2(len(vocabs))):
        if pmi[i][j] > 0:
            G.add_edge(w1, w2, weight=pmi[i][j])

    print("Calculating doc_word edges")
    for i, review_id in tqdm(enumerate(ids), total=len(ids)):
        for j, word in enumerate(vocabs):
            G.add_edge(review_id, word, weight=tfidf[i][j])

    print("Calculating doc_doc edges")
    for review_id in tqdm(ids, total=len(ids)):
        G.add_edge(review_id, review_id, weight=1)

    util.save_pickle(G, "graph.pkl")
    return G
Ejemplo n.º 5
0
def load_data(file, max_num=None):
    if util.is_exist("data.pkl"):
        print("loaded")
        return util.load_pickle("data.pkl")
    labels = []
    reviews = []
    ids = []
    with open(file, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        max_line = len(lines) if max_num is None else max_num + 1
        # Ignore Header Line
        for line in lines[1:max_line]:
            id, label, review = line.split("\t")
            labels.append(label)
            # remove quotation marks around
            review = review[1:len(review) - 1]
            reviews.append(review)
            # remove quotation marks around
            id = id[1:len(id) - 1]
            ids.append(id)

    reviews = [cleanText(r) for r in reviews]

    util.save_pickle((ids, labels, reviews), "data.pkl")
    return ids, labels, reviews
Ejemplo n.º 6
0
    def toggle_display_style(self, menu):
        """
        Toggle display style
        """
        if isinstance(menu, str):
            if not self.display_style.has_key(menu):
                self.display_style[menu] = 0
            self.display_style[menu] = (self.display_style[menu] + 1) % len(self.settings.sets[menu].style)
            return 1

        if menu.force_skin_layout != -1:
            return 0

        if menu and menu.skin_settings:
            settings = menu.skin_settings
        else:
            settings = self.settings

        if settings.special_menu.has_key(menu.item_types):
            area = settings.special_menu[menu.item_types]
        else:
            area = settings.default_menu['default']

        if self.display_style['menu'] >=  len(area.style):
            self.display_style['menu'] = 0
        self.display_style['menu'] = (self.display_style['menu'] + 1) % len(area.style)

        self.storage[config.SKIN_XML_FILE] = self.display_style['menu']
        util.save_pickle(self.storage, self.storage_file)
        return 1
Ejemplo n.º 7
0
    def set_base_fxd(self, name):
        """
        set the basic skin fxd file
        """
        config.SKIN_XML_FILE = os.path.splitext(os.path.basename(name))[0]
        logger.debug('load basic skin settings: %s', config.SKIN_XML_FILE)

        # try to find the skin xml file
        if not self.settings.load(name, clear=True):
            print "skin not found, using fallback skin"
            self.settings.load('basic.fxd', clear=True)

        for dir in config.cfgfilepath:
            local_skin = '%s/local_skin.fxd' % dir
            if os.path.isfile(local_skin):
                logger.log( 9, 'Skin: Add local config %s to skin', local_skin)
                self.settings.load(local_skin)
                break

        self.storage['SKIN_XML_FILE'] = config.SKIN_XML_FILE
        util.save_pickle(self.storage, self.storage_file)

        if self.storage.has_key(config.SKIN_XML_FILE):
            self.display_style['menu'] = self.storage[config.SKIN_XML_FILE]
        else:
            self.display_style['menu'] = 0
Ejemplo n.º 8
0
def setup_DUC_sentences(task, parser=None, reload=False):

    ## load problems quickly from pickle file
    if (not reload) and os.path.isfile(task.data_pickle):
        sys.stderr.write('Loading [%s] problem data from [%s]\n' %
                         (task.name, task.data_pickle))
        task.problems = util.load_pickle(task.data_pickle)
        return

    ## only parse sentences if needed
    for problem in task.problems:
        print problem.id
        problem.load_documents()
        if parser:
            for doc in problem.new_docs:
                doc.parse_sentences(parser)

    if parser:
        parser.run()
        for sentence, parsetree in parser.parsed.items():
            sentence.parsed = parsetree

    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %
                     (task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
Ejemplo n.º 9
0
    def fetchheadlinesfromurl(self):
        headlines = []

        popup = PopupBox(text=_('Fetching headlines...'))
        popup.show()
        try:
            # parse the document
            doc = util.feedparser.parse(self.url)
            if doc.status < 400:
                for entry in doc['entries']:
                    try:
                        title = Unicode(entry.title)
                        link  = Unicode(entry.link)
                        if entry.has_key('content') and len(entry['content']) >= 1:
                            description = Unicode(entry['content'][0].value)
                        else:
                            description = Unicode(entry['summary_detail'].value)
                        headlines.append((title, link, description))
                    except AttributeError:
                        pass
            else:
                logger.debug('Error %s, getting %r', doc.status, self.url)

            #write the file
            if len(headlines) > 0:
                pfile = os.path.join(self.cachedir, 'headlines-%i' % self.location_index)
                util.save_pickle(headlines, pfile)
        finally:
            popup.destroy()

        return headlines
Ejemplo n.º 10
0
def setup_DUC_sentences(task, parser=None, reload=False, options=None):

    ## load problems quickly from pickle file
    if (not reload) and os.path.isfile(task.data_pickle):
        sys.stderr.write('Loading [%s] problem data from [%s]\n' %
                         (task.name, task.data_pickle))
        task.problems = util.load_pickle(task.data_pickle)
        return

    ## parse sentences
    if options:
        text.text_processor.load_splitta_model(options.splitta_model)
    else:
        text.text_processor.load_splitta_model(
            '/u/dgillick/sbd/splitta/model_nb/')
    for problem in task.problems:
        sys.stderr.write('%s\n' % problem.id)
        problem.load_documents()
        if parser:
            for doc in problem.new_docs:
                doc.parse_sentences(parser)
                problem.parsed = True

    if parser:
        parser.run()
        for sentence, parsetree in parser.parsed.items():
            sentence.parsed = parsetree

    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %
                     (task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
Ejemplo n.º 11
0
def make_wordvec_dict(f_name, out_fname, threads):
    """
    Loads the pickle containing the dictionary gets word vector from
    parallel processing it and puts into dict saved in out_fname
    """
    # Make list of unique words
    word_list = list(load_pickle(f_name))

    # Send job to workers
    per_f = int(len(word_list) / threads) + 1
    logging.info('Per Thread {}'.format(per_f))
    pool = mp.Pool(processes=threads)
    processes = [
        pool.apply_async(parallel_word_dict,
                         args=(word_list, per_f * (x - 1), per_f * x))
        for x in range(1, threads + 1)
    ]

    # Get data and put it out
    output = [process.get() for process in processes]
    out_dict = {}
    for ele in output:
        out_dict = {**out_dict, **ele}
    pool.close()
    save_pickle(out_dict, out_fname)
    logging.info('Made Dictionary Using Spacy')
Ejemplo n.º 12
0
    def __init__(self, dataPath, indexfile,
                spectype='stft', n_fft=1024, window='hann', win_length=1024, hop_length=512,
                shape='disk', neighbourhood=10, uniform=True, show=False,
                gap=50, targetsize=(200, 200)):

        # Parameters for spectrogram
        self.spectype = spectype
        self.n_fft = n_fft
        self.window = window
        self.win_length = win_length
        self.hop_length = hop_length

        # Parameters for peak picking
        self.shape = shape
        self.neighbourhood = neighbourhood
        self.uniform = uniform
        self.show = show

        # Parameters for combinatorial hashing
        self.gap = gap
        self.targetsize = targetsize

        # Stores hashed data for matching
        self.data = {}
        self.identity2title = []

        # Looping through all data and saving (assumes the whole directory is full of wavs to index)
        for identity, filename in enumerate(tqdm(os.listdir(dataPath))):
            self.identity2title.append(filename)  # Keeping track of id and title with .wav at end
            hash_dict = self.fingerprint(dataPath+'/'+filename, identity)
            self.data.update(hash_dict)

        save_pickle(self, indexfile)  # saves itself as an object with data and parameter info
def prepare_location(loc):
    print "  Processing %s..." % loc

    data_i, xs_i, ys_i = read_location_images(loc, 'three_band')
    data_m, xs_m, ys_m = read_location_images(loc, 'sixteen_band', 'M')
    #data_p, xs_p, ys_p = read_location_images(loc, 'sixteen_band', 'P')

    # Prepare images
    for i in xrange(n_location_images):
        for j in xrange(n_location_images):
            meta = {
                'shape': (0, ys_i[i + 1] - ys_i[i], xs_i[j + 1] - xs_i[j]),
                'shape_i': (data_i.shape[0], ys_i[i + 1] - ys_i[i],
                            xs_i[j + 1] - xs_i[j]),
                'shape_m': (data_m.shape[0], ys_m[i + 1] - ys_m[i],
                            xs_m[j + 1] - xs_m[j]),
                #'shape_p': (data_p.shape[0], ys_p[i+1] - ys_p[i], xs_p[j+1] - xs_p[j])
            }

            save_pickle('cache/meta/%s_%d_%d.pickle' % (loc, i, j), meta)

    write_location_images(loc, data_i, xs_i, ys_i, 'I')
    write_location_images(loc, data_m, xs_m, ys_m, 'M')
    #write_location_images(loc, data_p, xs_p, ys_p, 'P')

    write_location_images(loc, normalize(data_m), xs_m, ys_m,
                          'MN')  # Write location-normalized M channels

    write_location_images(loc, compute_filters(data_i), xs_i, ys_i, 'IF')
    write_location_images(loc, compute_indices(data_m), xs_m, ys_m, 'MI')
Ejemplo n.º 14
0
def disc_info(media, force=False):
    """
    return kaa metadata disc information for the media
    """
    discinfo = mmpython.parse(media.devicename)
    if not discinfo or not discinfo.id:
        # bad disc, e.g. blank disc
        return {}

    cachedir  = os.path.join(config.OVERLAY_DIR, 'disc/metadata')
    cachefile = os.path.join(cachedir, discinfo.id + '.freevo')

    if os.path.isfile(cachefile):
        metainfo = util.read_pickle(cachefile)
    else:
        metainfo = {}

    if force or discinfo.mime == 'unknown/unknown' and not metainfo.has_key('disc_num_video'):
        media.mount()
        for type in ('video', 'audio', 'image'):
            items = getattr(config, '%s_SUFFIX' % type.upper())
            files = util.match_files_recursively(media.mountdir, items)
            metainfo['disc_num_%s' % type] = len(files)
        media.umount()
        util.save_pickle(metainfo, cachefile)

    info = Info(cachefile, discinfo, metainfo)
    info.disc = True
    return info
Ejemplo n.º 15
0
    def fetchheadlinesfromurl(self):
        headlines = []

        popup = PopupBox(text=_('Fetching headlines...'))
        popup.show()
        try:
            # parse the document
            doc = util.feedparser.parse(self.url)
            if doc.status < 400:
                for entry in doc['entries']:
                    try:
                        title = Unicode(entry.title)
                        link = Unicode(entry.link)
                        if entry.has_key('content') and len(
                                entry['content']) >= 1:
                            description = Unicode(entry['content'][0].value)
                        else:
                            description = Unicode(
                                entry['summary_detail'].value)
                        headlines.append((title, link, description))
                    except AttributeError:
                        pass
            else:
                logger.debug('Error %s, getting %r', doc.status, self.url)

            #write the file
            if len(headlines) > 0:
                pfile = os.path.join(self.cachedir,
                                     'headlines-%i' % self.location_index)
                util.save_pickle(headlines, pfile)
        finally:
            popup.destroy()

        return headlines
Ejemplo n.º 16
0
def preprocess():
    if util.is_exist("preprocessed.pkl"):
        print("loading")
        return util.load_pickle("preprocessed.pkl")
    _, labels, _ = util.load_pickle("data.pkl")
    labels = np.array(labels, dtype=np.int32)
    G = util.load_pickle("graph.pkl")

    print("calc adjacent matrix")
    A = networkx.to_numpy_matrix(G, weight="weight")

    print("calc degree matrix")
    degrees = [d**-0.5 if d != 0 else 0 for _, d in G.degree]

    print("normalize adjacent matrix")
    '''
    degrees = np.diag(degrees)
    A_hat = degrees @ A @ degrees
    '''
    # decrease memory allocation
    A_hat = A
    for i in tqdm(range(A.shape[0]), total=A.shape[0]):
        for j in range(A.shape[1]):
            A_hat[i, j] *= degrees[i] * degrees[j]

    print("calc feature matrix")
    X = np.eye(G.number_of_nodes())  # Features are just identity matrix
    util.save_pickle((X, A_hat, labels), "preprocessed.pkl")
    return X, A_hat, labels
Ejemplo n.º 17
0
def setup_DUC_sentences(task, parser=None, reload=False):

    ## load problems quickly from pickle file
    if (not reload) and os.path.isfile(task.data_pickle):
        sys.stderr.write('Loading [%s] problem data from [%s]\n' %(task.name, task.data_pickle))
        task.problems = util.load_pickle(task.data_pickle)
        return

    ## parse sentences
    text.text_processor.load_splitta_model('/u/dgillick/sbd/splitta/model_nb/')
    for problem in task.problems:
        sys.stderr.write('%s\n' %problem.id)
        problem.load_documents()
        if parser:
            for doc in problem.new_docs:
                doc.parse_sentences(parser)
                problem.parsed = True
                
    if parser:
        parser.run()
        for sentence, parsetree in parser.parsed.items():
            sentence.parsed = parsetree
        
    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
Ejemplo n.º 18
0
def delete_old_files_2():
    """
    delete cachfiles/entries for files which don't exists anymore
    """
    print checking('deleting old web-server thumbnails'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(vfs.www_image_cachedir(),
                                             config.IMAGE_SUFFIX):
        if not vfs.isfile(
                file[len(vfs.www_image_cachedir()):file.rindex('.')]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting old cache files'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(config.OVERLAY_DIR, ['raw']):
        if file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        if not vfs.isfile(file[len(config.OVERLAY_DIR):-4]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting cache for directories not existing anymore'),
    subdirs = util.get_subdirs_recursively(config.OVERLAY_DIR)
    subdirs.reverse()
    for file in subdirs:
        if not os.path.isdir(file[len(config.OVERLAY_DIR):]) and not \
                file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            for metafile in ('cover.png', 'cover.png.raw', 'cover.jpg',
                             'cover.jpg.raw', 'mmpython.cache',
                             'freevo.cache'):
                if os.path.isfile(os.path.join(file, metafile)):
                    os.unlink(os.path.join(file, metafile))
            if not os.listdir(file):
                os.rmdir(file)
    print 'done'

    print checking('deleting old entries in meta-info'),
    sys.__stdout__.flush()
    for filename in util.recursefolders(config.OVERLAY_DIR, 1, 'freevo.cache',
                                        1):
        if filename.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        sinfo = os.stat(filename)
        if not sinfo[ST_SIZE]:
            #print '%s is empty' % filename
            continue
        dirname = os.path.dirname(filename)[len(config.OVERLAY_DIR):]
        data = util.read_pickle(filename)
        for key in copy.copy(data):
            if not os.path.exists(os.path.join(dirname, str(key))):
                del data[key]
        util.save_pickle(data, filename)
    print 'done'
Ejemplo n.º 19
0
    def fetchheadlinesfromurl(self):
        """
        this fetches the headlines (title, link and description) from the url.
        Here the parsing of the xml is done
        """
        headlines = []
        # create Reader object
        reader = Sax2.Reader()

        popup = dialog.show_working_indicator(_('Fetching headlines...'))
        

        # parse the document
        try:
            myfile=urllib.urlopen(self.url)
            doc = reader.fromStream(myfile)
            items = doc.getElementsByTagName('item')
            for item in items:
                title = ''
                link  = ''
                description = ''

                if item.hasChildNodes():
                    for c in item.childNodes:
                        if c.localName == 'title':
                            title = c.firstChild.data
                        if c.localName == 'description':
                            description = c.firstChild.data
                        #################################
                        # Ajout pour identifier le lien de la video
                        if self.mode == 'youtube':
                            if c.localName == 'link':
                                link='youtube:'+c.firstChild.data
                        else:
                            if c.localName == 'enclosure':
                                attrs = c.attributes
                                for attrName in attrs.keys():
                                    attrNode = attrs.get(attrName)
                                    attrValue = attrNode.nodeValue
                                    if 'url' in attrName:
                                        link = attrValue

                if title:
                    headlines.append((title, link, description))

        except:
            #unreachable or url error
            logger.error('could not open %s', self.url)
            pass

        #write the file
        if len(headlines) > 0:
            pfile = os.path.join(self.cachedir, 'itv-%i' % self.location_index)
            util.save_pickle(headlines, pfile)

        popup.hide()
        return headlines
Ejemplo n.º 20
0
def del_cache():
    """
    delete all cache files because kaa metadata got updated
    """
    for f in util.recursefolders(config.OVERLAY_DIR,1,'mmpython.cache',1):
        os.unlink(f)
    for f in util.match_files(config.OVERLAY_DIR + '/disc/metadata', ['mmpython']):
        os.unlink(f)
    cachefile = os.path.join(config.FREEVO_CACHEDIR, 'mediainfo')
    util.save_pickle((mmpython.version.VERSION, 0, 0, 0), cachefile)
Ejemplo n.º 21
0
 def save_cache(self):
     """
     save a modified cache file
     """
     if self.cache_modified:
         logger.log( 9, 'save cache %s', self.current_cachefile)
         util.save_pickle(self.current_objects, self.current_cachefile)
         self.cache_modified = False
         if config.MEDIAINFO_USE_MEMORY:
             self.all_directories[self.current_cachefile] = self.current_objects
Ejemplo n.º 22
0
def del_cache():
    """
    delete all cache files because kaa metadata got updated
    """
    for f in util.recursefolders(config.OVERLAY_DIR,1,'mmpython.cache',1):
        os.unlink(f)
    for f in util.match_files(config.OVERLAY_DIR + '/disc/metadata', ['mmpython']):
        os.unlink(f)
    cachefile = os.path.join(config.FREEVO_CACHEDIR, 'mediainfo')
    util.save_pickle((mmpython.version.VERSION, 0, 0, 0), cachefile)
Ejemplo n.º 23
0
 def save_cache(self):
     """
     save a modified cache file
     """
     if self.cache_modified:
         logger.log( 9, 'save cache %s', self.current_cachefile)
         util.save_pickle(self.current_objects, self.current_cachefile)
         self.cache_modified = False
         if config.MEDIAINFO_USE_MEMORY:
             self.all_directories[self.current_cachefile] = self.current_objects
Ejemplo n.º 24
0
 def saveToCache(self):
     util.save_pickle(self.weatherData, self.cacheFile)
     # attempt to save weathermap
     try:
         if self.weatherMapData is not None:
             imgfd = os.open(self.mapFile, os.O_CREAT|os.W_OK)
             os.write(imgfd, self.weatherMapData)
             os.close(imgfd)
     except:
         print "failed while saving weather map to cache '%s'" % (self.mapFile,)
Ejemplo n.º 25
0
 def saveToCache(self):
     util.save_pickle(self.weatherData, self.cacheFile)
     # attempt to save weathermap
     try:
         if self.weatherMapData is not None:
             imgfd = os.open(self.mapFile, os.O_CREAT|os.W_OK)
             os.write(imgfd, self.weatherMapData)
             os.close(imgfd)
     except:
         print "failed while saving weather map to cache '%s'" % (self.mapFile,)
Ejemplo n.º 26
0
def delete_old_files_2():
    """
    delete cachfiles/entries for files which don't exists anymore
    """
    print checking('deleting old web-server thumbnails'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(vfs.www_image_cachedir(), config.IMAGE_SUFFIX):
        if not vfs.isfile(file[len(vfs.www_image_cachedir()):file.rindex('.')]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting old cache files'),
    sys.__stdout__.flush()
    num = 0
    for file in util.match_files_recursively(config.OVERLAY_DIR, ['raw']):
        if file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        if not vfs.isfile(file[len(config.OVERLAY_DIR):-4]):
            os.unlink(file)
            num += 1
    print 'deleted %s file%s' % (num, num != 1 and 's' or '')

    print checking('deleting cache for directories not existing anymore'),
    subdirs = util.get_subdirs_recursively(config.OVERLAY_DIR)
    subdirs.reverse()
    for file in subdirs:
        if not os.path.isdir(file[len(config.OVERLAY_DIR):]) and not \
                file.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            for metafile in ('cover.png', 'cover.png.raw', 'cover.jpg', 'cover.jpg.raw', 'mmpython.cache',
                    'freevo.cache'):
                if os.path.isfile(os.path.join(file, metafile)):
                    os.unlink(os.path.join(file, metafile))
            if not os.listdir(file):
                os.rmdir(file)
    print 'done'

    print checking('deleting old entries in meta-info'),
    sys.__stdout__.flush()
    for filename in util.recursefolders(config.OVERLAY_DIR, 1, 'freevo.cache', 1):
        if filename.startswith(os.path.join(config.OVERLAY_DIR, 'disc')):
            continue
        sinfo = os.stat(filename)
        if not sinfo[ST_SIZE]:
            #print '%s is empty' % filename
            continue
        dirname = os.path.dirname(filename)[len(config.OVERLAY_DIR):]
        data    = util.read_pickle(filename)
        for key in copy.copy(data):
            if not os.path.exists(os.path.join(dirname, str(key))):
                del data[key]
        util.save_pickle(data, filename)
    print 'done'
Ejemplo n.º 27
0
def get_docs(task, num_docs, reload=False):
    """
    returns a new task, where each problem in task.problems has:
    problem.ir_docs = [ ... ]
    """

    ## check state
    if not reload and framework.check_state(task.problems)['ir']:
        sys.stderr.write('already have ir documents loaded\n')
        return task

    max_files = 0

    ## get all query tokens; use tfidf.get_tokens because this matches the index's tokenization
    queries_by_problem_id = {}
    for problem in task.problems:
        #curr_query = ' '.join(tfidf.get_tokens(problem.query.original))
        curr_query = ' '.join(make_query(problem))
        queries_by_problem_id[problem.id] = curr_query

    ## do the search
    all_queries = queries_by_problem_id.values()
    docs_by_query = tfidf.search(tfidf.file_index_pickle_path, all_queries,
                                 tfidf.search_cmd, max_files, num_docs)

    ## for debugging
    docfh = open('irdoc_debug', 'w')

    ## allocate docs to problems
    for problem in task.problems:
        query = queries_by_problem_id[problem.id]
        docs_with_values = docs_by_query[query]

        ## inspect values for debugging
        docfh.write('# problem [%s]\n' % problem.id)
        for doc, val in docs_with_values:
            docfh.write('## doc_id [%s]  value [%1.4f]\n' %
                        (doc.id, float(val)))
            for par in doc.paragraphs:
                docfh.write('%s\n' % par)

        ## sentence segmentation
        docs = [doc for doc, val in docs_with_values]
        for doc in docs:
            doc.get_sentences()

        problem.ir_docs = docs
        problem.loaded_ir_docs = True

    ## pickle it up
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %
                     (task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
    return task
Ejemplo n.º 28
0
 def save(self):
     dict = {'datasets': self.datasets,'version': 0.1} 
     
     #for now: make a backup first:
     database_filename = self.path+'/'+self.filename
     backup_filename = self.path+'/'+self.filename+'_backup_'+ut.formatted_time()
     print 'Backing up old database to ' + backup_filename       
     shutil.copy(database_filename, backup_filename)
     
     print "Saving: "+database_filename
     ut.save_pickle(dict,database_filename)    
Ejemplo n.º 29
0
    def save(self, save_dir):
        model_params = (self.ob_size, self.actions, self.device, self.gamma,
                        self.memory_size, self.layer_sizes, self.adv_sizes,
                        self.val_sizes, self.double, self.dueling)
        util.save_pickle(os.path.join(save_dir, 'params.pickle'), model_params)

        torch.save(self.policy_net.state_dict(),
                   os.path.join(save_dir, 'policy_net.pt'))

        util.save_pickle(os.path.join(save_dir, 'memory.pickle'), self.memory)

        logger.info(f'Model saved to {save_dir}')
Ejemplo n.º 30
0
def read_and_process(args, tokenizer, dataset_dict, dir_name, dataset_name, split):
    #TODO: cache this if possible
    cache_path = f'{dir_name}/{dataset_name}_encodings.pt'
    if os.path.exists(cache_path) and not args.recompute_features:
        tokenized_examples = util.load_pickle(cache_path)
    else:
        if split=='train':
            tokenized_examples = prepare_train_data(dataset_dict, tokenizer)
        else:
            tokenized_examples = prepare_eval_data(dataset_dict, tokenizer)
        util.save_pickle(tokenized_examples, cache_path)
    return tokenized_examples
Ejemplo n.º 31
0
def calc_tf_idf(reviews, min_df=0.01):
    if util.is_exist("tf-idf.pkl"):
        print("loaded")
        return util.load_pickle("tf-idf.pkl")
    vectorizer = TfidfVectorizer(input="content",
                                 stop_words=stopwords.words("english"),
                                 min_df=min_df,
                                 max_df=0.5)
    vectorizer.fit(reviews)
    tfidf = vectorizer.transform(reviews).toarray()
    vocab = vectorizer.get_feature_names()
    util.save_pickle((tfidf, vocab), "tf-idf.pkl")
    return tfidf, vocab
Ejemplo n.º 32
0
def make_dict_pickle(f_name, out_fname):
    """
    Cleans the tweets and makes a set of all the words
    """
    logging.info('Making pickle for the dictionary')
    word_set = set()
    for tweet in load_pickle(f_name)['dat']:
        words, _ = clean_tweet(tweet['text'])
        for word in words:
            word_set.add(word)
    logging.info('Number unique words: {}'.format(len(word_set)))
    save_pickle(word_set, out_fname)
    logging.info('Saved dictionary to: {}'.format(out_fname))
Ejemplo n.º 33
0
def get_dat():
    try:
        config = configparser.ConfigParser()
        config.read("tweepy_config")
        auth = tweepy.OAuthHandler(config['s1']['key'], config['s1']['secret'])
        auth.set_access_token(config['s1']['token'],
                              config['s1']['token_secret'])
        api = tweepy.API(auth)
        tweets = [(tweet.author.screen_name, tweet.text)
                  for tweet in api.search(q='bitcoin', count=300, lang='en')]
        save_pickle({'dat': tweets}, "new.pkl")
        return tweets
    except KeyError:
        print('Extraction of tweets did not work')
Ejemplo n.º 34
0
Archivo: ir.py Proyecto: DrDub/icsisumm
def get_docs(task, num_docs, reload=False):
    """
    returns a new task, where each problem in task.problems has:
    problem.ir_docs = [ ... ]
    """
    
    ## check state
    if not reload and framework.check_state(task.problems)['ir']:
        sys.stderr.write('already have ir documents loaded\n')
        return task
    
    max_files = 0

    ## get all query tokens; use tfidf.get_tokens because this matches the index's tokenization
    queries_by_problem_id = {}
    for problem in task.problems:
        #curr_query = ' '.join(tfidf.get_tokens(problem.query.original))
        curr_query = ' '.join(make_query(problem))
        queries_by_problem_id[problem.id] = curr_query
        
    ## do the search
    all_queries = queries_by_problem_id.values()
    docs_by_query = tfidf.search(tfidf.file_index_pickle_path, all_queries, tfidf.search_cmd, max_files, num_docs)

    ## for debugging
    docfh = open('irdoc_debug', 'w')
    
    ## allocate docs to problems
    for problem in task.problems:
        query = queries_by_problem_id[problem.id]
        docs_with_values = docs_by_query[query]

        ## inspect values for debugging
        docfh.write('# problem [%s]\n' %problem.id)
        for doc, val in docs_with_values:
            docfh.write('## doc_id [%s]  value [%1.4f]\n' %(doc.id, float(val)))
            for par in doc.paragraphs:
                docfh.write('%s\n' %par)

        ## sentence segmentation
        docs = [doc for doc, val in docs_with_values]
        for doc in docs: doc.get_sentences()
        
        problem.ir_docs = docs
        problem.loaded_ir_docs = True
        
    ## pickle it up
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
    return task
Ejemplo n.º 35
0
 def save_cache(self, settings, filename):
     """
     cache the fxd skin settings in 'settings' to the OVERLAY_DIR cachfile
     for filename and this resolution
     """
     cache = self.cachename(filename)
     if cache:
         # delete font object, because it can't be pickled
         for f in settings.font:
             del settings.font[f].font
         # save object and version information
         util.save_pickle((xml_skin.FXD_FORMAT_VERSION, settings), cache)
         # restore font object
         for f in settings.font:
             settings.font[f].font = osd.getfont(settings.font[f].name, settings.font[f].size)
Ejemplo n.º 36
0
 def store(self, key, value):
     """
     store the key/value in metadata and save the cache
     """
     self.metadata[key] = value
     if self.disc:
         self.metadata[key] = value
         util.save_pickle(self.metadata, self.filename)
         return True
     elif not self.filename:
         return False
     else:
         meta_cache.set(os.path.basename(self.filename), os.path.dirname(self.filename),
             self.filename, self.metadata)
         return True
Ejemplo n.º 37
0
 def store(self, key, value):
     """
     store the key/value in metadata and save the cache
     """
     self.metadata[key] = value
     if self.disc:
         self.metadata[key] = value
         util.save_pickle(self.metadata, self.filename)
         return True
     elif not self.filename:
         return False
     else:
         meta_cache.set(os.path.basename(self.filename), os.path.dirname(self.filename),
             self.filename, self.metadata)
         return True
Ejemplo n.º 38
0
 def delete(self, key):
     """
     delete the key in metadata and save the cache
     """
     if self.disc:
         if self.metadata.has_key(key):
             del self.metadata[key]
             util.save_pickle(self.metadata, self.filename)
         return True
     elif not self.filename:
         return False
     if self.metadata.has_key(key):
         del self.metadata[key]
         meta_cache.set(os.path.basename(self.filename), os.path.dirname(self.filename),
             self.filename, self.metadata)
         return True
Ejemplo n.º 39
0
 def delete(self, key):
     """
     delete the key in metadata and save the cache
     """
     if self.disc:
         if self.metadata.has_key(key):
             del self.metadata[key]
             util.save_pickle(self.metadata, self.filename)
         return True
     elif not self.filename:
         return False
     if self.metadata.has_key(key):
         del self.metadata[key]
         meta_cache.set(os.path.basename(self.filename), os.path.dirname(self.filename),
             self.filename, self.metadata)
         return True
Ejemplo n.º 40
0
 def train_punkt_model(self, text, save_path=None):
     """
     unsupervised training given some text
     optional save_path for future use
     """
     
     ## train tokenizer
     sys.stderr.write('Training...\n')
     t = nltk.tokenize.punkt.PunktSentenceTokenizer()
     t.ABBREV = 0.1  # threshold for identifying abbrevs (lower is more aggressive)
     t.train(rawtext)  
     self._sent_tokenizer = t
     
     ## pickle it
     if save_path:
         util.save_pickle(t, save_path)
         sys.stderr.write('Saved model as [%s]\n' %output)
Ejemplo n.º 41
0
    def train_punkt_model(self, text, save_path=None):
        """
        unsupervised training given some text
        optional save_path for future use
        """

        ## train tokenizer
        sys.stderr.write('Training...\n')
        t = nltk.tokenize.punkt.PunktSentenceTokenizer()
        t.ABBREV = 0.1  # threshold for identifying abbrevs (lower is more aggressive)
        t.train(rawtext)
        self._sent_tokenizer = t

        ## pickle it
        if save_path:
            util.save_pickle(t, save_path)
            sys.stderr.write('Saved model as [%s]\n' % output)
Ejemplo n.º 42
0
def build_model(files, options):

    ## create a Doc object from some labeled data
    train_corpus = get_data(files, tokenize=options.tokenize)

    ## create a new model
    if options.svm: model = SVM_Model(train_corpus, options.model_path)
    else: model = NB_Model(train_corpus, options.model_path)

    ## featurize the training corpus
    train_corpus.featurize(model)

    ## run the model's training routine
    model.train(train_corpus)

    ## save the model
    util.save_pickle(model, options.model_path + 'model.pkl')
    return model
Ejemplo n.º 43
0
    def save(self, filename=None):
        """
        Save the tree
        """
        if not filename:
            filename = self.filename
        if vfs.isfile(filename):
            vfs.unlink(filename)
        f = vfs.codecs_open(filename, 'wb', encoding='utf-8')
        f.write('<?xml version="1.0" encoding="utf-8" ?>\n')
        self._dump_recurse(f, self.tree)

        f.write('\n')
        f.close()

        f = vfs.open(filename)
        self.tree = self.parse(f)
        f.close()
        if self.tree:
            util.save_pickle(self.tree, vfs.getoverlay(filename + '.raw'))
Ejemplo n.º 44
0
 def __init__(self, filename):
     """
     Load the file and parse it. If the file does not exist, create
     an empty <freevo> node.
     """
     Parser.__init__(self)
     self.filename = filename
     if not vfs.isfile(filename):
         self.tree = XMLnode('freevo')
     else:
         self.tree = None
         cache = vfs.getoverlay(filename + '.raw')
         if os.path.isfile(filename) and os.path.isfile(cache) and \
                os.stat(cache)[stat.ST_MTIME] >= os.stat(filename)[stat.ST_MTIME]:
             self.tree = util.read_pickle(cache)
         if not self.tree:
             f = vfs.open(filename)
             self.tree = self.parse(f)
             f.close()
             if self.tree:
                 util.save_pickle(self.tree, cache)
Ejemplo n.º 45
0
def setup_DUC_sentences(task, parser=None, reload=False):

    ## load problems quickly from pickle file
    if (not reload) and os.path.isfile(task.data_pickle):
        sys.stderr.write('Loading [%s] problem data from [%s]\n' %(task.name, task.data_pickle))
        task.problems = util.load_pickle(task.data_pickle)
        return

    ## only parse sentences if needed
    for problem in task.problems:
        print problem.id
        problem.load_documents()
        if parser:
            for doc in problem.new_docs:
                doc.parse_sentences(parser)
                
    if parser:
        parser.run()
        for sentence, parsetree in parser.parsed.items():
            sentence.parsed = parsetree
        
    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)
def read_test_data(file):

    # assume if one is saved they all are
    if util.check_file_exists(CONST.DATASET_PATH + CONST.TEST_PATH):
        T_Data = util.load(CONST.DATASET_PATH + CONST.TEST_PATH)
        T_Labels = util.load(CONST.DATASET_PATH + CONST.TEST_PATH_LABELS)
        T_Queries = util.load(CONST.DATASET_PATH + CONST.TEST_PATH_Q)
        T_Docs = util.load(CONST.DATASET_PATH + CONST.TEST_PATH_DOCS)

    else:
        T_Data, T_Labels, T_Queries, T_Docs = read_train_data(file)

        util.save_pickle(CONST.DATASET_PATH + CONST.TEST_PATH, T_Data)
        util.save_pickle(CONST.DATASET_PATH + CONST.TEST_PATH_LABELS, T_Labels)
        util.save_pickle(CONST.DATASET_PATH + CONST.TEST_PATH_Q, T_Queries)
        util.save_pickle(CONST.DATASET_PATH + CONST.TEST_PATH_DOCS, T_Docs)

    return T_Data, T_Labels, T_Queries, T_Docs
Ejemplo n.º 47
0
    def prepare(self, features_k_nearest_neighbors, nonzero_indices = None, all_save_load = False, regenerate_neightborhood_indices = False):
        #print np.shape(self.processor.pts3d_bound), 'shape pts3d_bound'

        imgTmp = cv.cvCloneImage(self.processor.img)
        self.imNP = ut.cv2np(imgTmp,format='BGR')
        ###self.processor.map2d = np.asarray(self.processor.camPts_bound) #copied from laser to image mapping
        
        if features_k_nearest_neighbors == None or features_k_nearest_neighbors == False: #use range
            self.kdtree2d = kdtree.KDTree(self.processor.pts3d_bound.T)
            
            #print len(nonzero_indices)
            #print np.shape(np.asarray((self.processor.pts3d_bound.T)[nonzero_indices]))
            
            if nonzero_indices != None:
                print ut.getTime(), 'query ball tree for ', len(nonzero_indices), 'points'
                kdtree_query = kdtree.KDTree((self.processor.pts3d_bound.T)[nonzero_indices])
            else:
                print ut.getTime(), 'query ball tree'
                kdtree_query = kdtree.KDTree(self.processor.pts3d_bound.T)
            
            filename = self.processor.config.path+'/data/'+self.processor.scan_dataset.id+'_sphere_neighborhood_indices_'+str(self.processor.feature_radius)+'.pkl'
            if all_save_load == True and os.path.exists(filename) and regenerate_neightborhood_indices == False:
                #if its already there, load it:
                print ut.getTime(), 'loading',filename
                self.kdtree_queried_indices = ut.load_pickle(filename)    
            else:
                self.kdtree_queried_indices = kdtree_query.query_ball_tree(self.kdtree2d, self.processor.feature_radius, 2.0, 0.2) #approximate
                print ut.getTime(), 'queried kdtree: ',len(self.kdtree_queried_indices),'points, radius:',self.processor.feature_radius
                if all_save_load == True:
                    ut.save_pickle(self.kdtree_queried_indices, filename)
                    
            #make dict out of list for faster operations? (doesn't seem to change speed significantly):
            #self.kdtree_queried_indices = dict(zip(xrange(len(self.kdtree_queried_indices)), self.kdtree_queried_indices))
        
        else: #experiemental: use_20_nearest_neighbors == True
            #TODO: exclude invalid values in get_featurevector (uncomment code there)
           
            self.kdtree2d = kdtree.KDTree(self.processor.pts3d_bound.T)
            self.kdtree_queried_indices = []
            print ut.getTime(), 'kdtree single queries for kNN start, k=', features_k_nearest_neighbors
            count = 0
            for point in ((self.processor.pts3d_bound.T)[nonzero_indices]):
                count = count + 1
                result = self.kdtree2d.query(point, features_k_nearest_neighbors,0.2,2,self.processor.feature_radius)
                #existing = result[0][0] != np.Inf
                #print existing
                #print result[1]
                self.kdtree_queried_indices += [result[1]] #[existing]
                if count % 4096 == 0:
                    print ut.getTime(),count
            print ut.getTime(), 'kdtree singe queries end'
            
            #convert to numpy array -> faster access
            self.kdtree_queried_indices = np.asarray(self.kdtree_queried_indices)
        
        #print self.kdtree_queried_indices
        #takes long to compute:
        #avg_len = 0
        #minlen = 999999
        #maxlen = 0
        #for x in self.kdtree_queried_indices:
        #    avg_len += len(x)
        #    minlen = min(minlen, len(x))
        #    maxlen = max(maxlen, len(x))
        #avg_len = avg_len / len(self.kdtree_queried_indices)
        #print ut.getTime(), "range neighbors: avg_len", avg_len, 'minlen', minlen, 'maxlen', maxlen
        
        
        #create HSV numpy images:
        # compute the hsv version of the image 
        image_size = cv.cvGetSize(self.processor.img)
        img_h = cv.cvCreateImage (image_size, 8, 1)
        img_s = cv.cvCreateImage (image_size, 8, 1)
        img_v = cv.cvCreateImage (image_size, 8, 1)
        img_hsv = cv.cvCreateImage (image_size, 8, 3)
        
        cv.cvCvtColor (self.processor.img, img_hsv, cv.CV_BGR2HSV)
        
        cv.cvSplit (img_hsv, img_h, img_s, img_v, None)
        self.imNP_h = ut.cv2np(img_h)
        self.imNP_s = ut.cv2np(img_s)
        self.imNP_v = ut.cv2np(img_v)
        
        textures = texture_features.eigen_texture(self.processor.img)
        self.imNP_tex1 = textures[:,:,0]
        self.imNP_tex2 = textures[:,:,1]
        
        self.debug_before_first_featurevector = True
        
        self.generate_voi_histogram(self.processor.point_of_interest,self.processor.voi_width)
Ejemplo n.º 48
0
 def saveToCache(self):
     logger.log( 9, 'saveToCache()')
     util.save_pickle(self.elocationData, self.cacheElocation)
     util.save_pickle(self.currentData, self.cacheCurrent)
     util.save_pickle(self.forecastData, self.cacheForecast)
Ejemplo n.º 49
0
 def cacheData(self, name, data):
     if self.filename == "remote":
         return None
     save_pickle(self.filename + "." + name, data)
Ejemplo n.º 50
0
 def write(self, filename):
   util.save_pickle(self.d, filename)
Ejemplo n.º 51
0
 def save(self):
     util.save_pickle(self.featdict, self.path + 'feats')
Ejemplo n.º 52
0
    text.text_processor.load_splitta_model('lib/splitta/model_nb/')
    # Skip update data
    if task_name[:3] == 'tac':
      framework.setup_TAC08(task, True)
    elif task_name[:3] == 'duc':
      framework.setup_DUC_basic(task, True)
    elif task_name[:3] == 'new':
      framework.setup_news(task)
    else:
      raise Exception('Unknown task %s' % task)
    if task_name[:3] != 'new':
      for problem in task.problems:
        problem.load_documents()
    ## save pickled version for faster loading later
    sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle))
    util.save_pickle(task.problems, task.data_pickle)

  # Tokenize for parser
  tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer()

  for problem in task.problems:
    num_sents = sum([len(doc.sentences) for doc in problem.new_docs])
    sys.stderr.write("%s %d %d\n" % (problem.id, len(problem.new_docs), num_sents))

    if args.boundary:
      with open('%s/%s.sentid' % (sent_path, problem.id), 'r') as fin, \
          open('%s/%s.boundary' % (sent_path, problem.id), 'w') as fout:
        sent_ids = [int(s.strip()) for s in fin.readlines()]
        j = 0   # sent_ids index
        for doc in problem.new_docs:
          new_doc = True
Ejemplo n.º 53
0
            if d == os.path.sep:
                print 'ERROR: %s_ITEMS contains root directory, skipped.' % type
                setattr(config, '%s_ITEMS' % type, [])

    if os.path.isdir(os.path.join(config.FREEVO_CACHEDIR, 'playlists')):
        config.AUDIO_ITEMS.append(('Playlists', os.path.join(config.FREEVO_CACHEDIR, 'playlists')))
    delete_old_files_1()
    delete_old_files_2()

    # we have time here, don't use exif thumbnails
    config.IMAGE_USE_EXIF_THUMBNAIL = 0

    cache_directories(rebuild)
    if config.CACHE_IMAGES:
        cache_thumbnails()
    create_metadata()
    create_tv_pickle()

# close db
util.mediainfo.sync()

# save cache info
try:
    import kaa.metadata.version
    util.save_pickle((kaa.metadata.version.VERSION, VERSION, int(time.time()), complete_update), cachefile)
except ImportError:
    print 'WARNING: please update kaa.metadata'

print
print 'caching complete after %s seconds' % (time.clock() - start)
Ejemplo n.º 54
0
def get_guide(popup=False, XMLTV_FILE=None):
    """
    Get a TV guide from memory cache, file cache or raw XMLTV file.
    Tries to return at least the channels from the config file if there
    is no other data
    """
    global cached_guide

    if not XMLTV_FILE:
        XMLTV_FILE = config.XMLTV_FILE

    if popup:
        import dialog.dialogs
        popup_dialog = dialog.dialogs.ProgressDialog( _('Preparing the program guide'), indeterminate=True)
        
    # Can we use the cached version (if same as the file)?
    if (cached_guide == None or
        (os.path.isfile(XMLTV_FILE) and
         cached_guide.timestamp != os.path.getmtime(XMLTV_FILE))):

        # No, is there a pickled version ("file cache") in a file?
        pname = '%s/TV.xml.pickled' % config.FREEVO_CACHEDIR

        got_cached_guide = False
        if (os.path.isfile(XMLTV_FILE) and
            os.path.isfile(pname) and (os.path.getmtime(pname) > os.path.getmtime(XMLTV_FILE))):
            logger.debug('XMLTV, reading cached file (%s)', pname)

            if popup:
                popup_dialog.show()
                inprogress = kaa.ThreadCallable(util.read_pickle, pname)()
                inprogress.wait()
                cached_guide = inprogress.result
            else:
                cached_guide = util.read_pickle(pname)

            epg_ver = None
            try:
                epg_ver = cached_guide.EPG_VERSION
            except AttributeError:
                logger.debug('EPG does not have a version number, must be reloaded')

            if epg_ver != EPG_VERSION:
                logger.debug('EPG version missmatch, must be reloaded')

            elif cached_guide.timestamp != os.path.getmtime(XMLTV_FILE):
                # Hmmm, weird, there is a pickled file newer than the TV.xml
                # file, but the timestamp in it does not match the TV.xml
                # timestamp. We need to reload!
                logger.debug('EPG: Pickled file timestamp mismatch, reloading!')

            else:
                logger.info('XMLTV, got cached guide (version %s).', epg_ver)
                got_cached_guide = True

        if not got_cached_guide:
            # Need to reload the guide

            

            logger.debug('XMLTV, trying to read raw file (%s)', XMLTV_FILE)
            try:
                if popup:
                    popup_dialog.set_indeterminate(False)
                    popup_dialog.show()
                    inprogress = kaa.ThreadCallable(load_guide, XMLTV_FILE, popup_dialog)()
                    inprogress.wait()
                    cached_guide = inprogress.result
                    popup_dialog.set_indeterminate(True)
                else:
                    cached_guide = load_guide(XMLTV_FILE)
            except:
                # Don't violently crash on a incomplete or empty TV.xml please.
                cached_guide = None
                print
                print String(_("Couldn't load the TV Guide, got an exception!"))
                print
                traceback.print_exc()
            else:
                # Replace config.XMLTV_FILE before we save the pickle in order
                # to avoid timestamp confision.
                if XMLTV_FILE != config.XMLTV_FILE:
                    logger.info('copying %r -> %r', XMLTV_FILE, config.XMLTV_FILE)
                    shutil.copyfile(XMLTV_FILE, config.XMLTV_FILE)
                    os.unlink(XMLTV_FILE)
                    cached_guide.timestamp = os.path.getmtime(config.XMLTV_FILE)

                # Dump a pickled version for later reads
                if popup:
                    kaa.ThreadCallable(util.save_pickle, cached_guide, pname)().wait()
                else:
                    util.save_pickle(cached_guide, pname)

    if not cached_guide:
        # An error occurred, return an empty guide
        cached_guide = TvGuide()

    if popup:
        popup_dialog.hide()

    return cached_guide
Ejemplo n.º 55
0
def saveMameRomList(mameRomList):

    if not mameRomList or mameRomList == None:
        mameRomList = mame_types.MameRomList()

    util.save_pickle(mameRomList, config.GAMES_MAME_CACHE)