Example #1
0
import sys

from src.misc import mail

if __name__ == "__main__":

    argv=sys.argv[1:]

    msgtype=argv[0]
    msg=argv[1]

    if msgtype=='suc':
        mail.sendemail_success(msg)
    elif msgtype=='err':
        mail.sendemail_error(msg)

    #get pivotwords
    pivotwordvectors=json.load(open('vectors/vector200-1.txt.norm'))
    pivotwords=pivotwordvectors.keys()
    #pivotwords=['book']

    p = Pool(thread_no)
    result=False
    try:
        logger.info('starting the pool map')
        p.map(extended_distribution, range(len(pivotwords)))
        #extended_distribution(0)
        p.close()
        logger.info('pool map complete')
        msgText=identity+' : extended_distribution('+simfunction+') finished!'
        mail.sendemail_success(msgText)

    except KeyboardInterrupt:
        logger.info('got ^C while pool mapping, terminating the pool')
        p.terminate()
        logger.info('pool is terminated')
    except Exception, e:
        logger.info('got exception: %r, terminating the pool' % (e,))
        p.terminate()
        logger.info('pool is terminated')
    finally:
        logger.info('joining pool processes')
        p.join()
        logger.info('join complete')

    index_dir_path='/data/text/retrieval/data/TREC/TREC-1/Adhoc/indexes/terrier-core-4.1'
    #pivotwords=['book', 'trip', 'excursion', 'dwarfish']
    #pivotwords=['asylum','bishop','bread','book','crane','coast','computer','company','cup','drug','football','forest','journey','king','law','magician','movie','mother','physics','smile','telephone','train','vodka','weapon','wood','alleviated','assigned','assessment','carbonic','circumcising','circumvents','comfortable','dispossess','disinheritance','dwarfish','entrapping','hyperlink','imperfection','involvement','incommensurate','incubate','marginalize','provincialism','radiators','soulfully','subdividing','squishing','tenderize','urbanize','vulgarism']

    pivotwords=['revolutionary','casual','theoretical','novel','respected','bright','feasible','responsive','bipolar','forceful','successfully','consequently','daily','partially','immediately','finally','asleep','unrealistically','skillfully','erratically','costume','table','piano','mail','mosque','greenhouse','shower','supercharger','apparel','creamery','stature','aggression','humanity','deficit','function','credibility','concentration','responsiveness','virility','inconvenience','shame','guilt','pride','dislike','hope','love','despair','eagerness','aggravation','insecurity','detective','psychologist','freud','ford','einstein','john','armstrong','philemon','microbiologist','lancelot','recognize','compare','suppress','estimate','receive','observe','find','demoralize','standardize','decipher','play','feed','drink','consume','want','give','carry','devour','inhale','nibble','cook','paint','dance','create','prove','draw','shape','delineate','sew','republish','interest','upset','hurt','appreciate','fail','suffer','miss','antagonize','apprehend','inflame']

    anntreeballlist=[]
    for i in [1,3,4]:#range(1,6):
        anntreeball=BallTreeANNEmbeddingIndex()
        anntreeball_path='/data/nrekabsaz/data/embeddingindex/wikinostem/w2v'+dim+'-'+str(i)+'-norm/model.idx'
        print anntreeball_path
        anntreeball.load_index(anntreeball_path)
        anntreeballlist.append(anntreeball)



    preprocessing=Preprocessing(index_dir_path)
    expandterm=ExpandTerm(preprocessing, logger, '')

    print 'loading anntreeballs done!'

    savemixcdf(pivotwords, anntreeballlist, expandterm)

    #expanding to mixpdflist    

    mail.sendemail_success('savemixcdf done!')