Example #1
0
def setAliasesIfNeeded(idstring, data = {}, reset = False):    

    #parse idstring
    type, key = idstring.split(':')[0], ':'.join(idstring.split(':')[1:])
    assert type in ['food', 'band', 'bandcollection', 'person']

    #load the file in which to store information for the type of idstring
    path = os.path.join(econtains_data_root,
                        '{0}.pickle'.format(type))
    if not os.path.isdir(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path))
    if os.path.isfile(path):
        content = pickle.load(open(path))
    else: 
        content = {}
    
    #use freebase to gather info for idstring only if needed.a
    if (not reset) and content.has_key(key):
        return {'count':len(content[key]),
                'written': False,
                'aliases':content[key]}
    else:
        data.update(dict(name =  key))
        aliases = fbu.fetch_type(type, **data)
        content[key] = aliases
        pickle.dump(content, open(path,'w'))
        return {'count':len(aliases),
                'written':True,
                'aliases':content[key]}
Example #2
0
def run(alltweets, freebase_type = 'band', **kwargs):
    '''
    yield a list of statistically significant tweets.
    cross check it against a set of aliases generated by freebase.
    
    called with a freebase type, it will call the fetch_type routine
    provided by freebase.py in order to grab a list of aliases to match
    against discovered terms.
    '''

    counts = count(tokenize(alltweets))
    long_keys = set([k for k in counts.keys() if len(k)>=5])
    freebase_aliases = fbu.fetch_type(freebase_type, **mem.sr(kwargs))
    matched = [ a.lower() for a  in freebase_aliases if (a.lower() in long_keys)]
    
    raise Exception()