Example #1
0
def run(lam,gam,tau,ITERS,attack_years = [], savetail='', pmitail = ''):
    r   = 50  # rank
    b = nw # batch size
    emph = 1 # emphasize the nonzero
    savefile = savehead+'L'+str(lam)+'T'+str(tau)+'G'+str(gam)+'A'+str(emph)

    if len(savetail) > 0: savetail = '_' + savetail
    if len(pmitail) > 0: pmitail = '_' + pmitail
    savefile = savefile + savetail
    
    embfilename = 'data/emb_static%s' % savetail
    print savefile, embfilename
    try:
        e = sio.loadmat(embfilename)['emb']
    except(IOError):
        print 'file not available yet', embfilename
        return
        
    
    print('starting training with following parameters')
    print_params(r,lam,tau,gam,emph,ITERS)
    print('there are a total of {} words, and {} time points'.format(nw,T))
    
    print('X*X*X*X*X*X*X*X*X')
    print('initializing')
    
    Ulist = [copy.deepcopy(e) for x in T]
    Vlist = [copy.deepcopy(e) for x in T]
    del e
    
    
    print('getting batch indices')
    if b < nw:
        b_ind = util.getbatches(nw,b)
    else:
        b_ind = [range(nw)]
    
    import time
    start_time = time.time()
    # sequential updates
    for iteration in xrange(ITERS):  
        print_params(r,lam,tau,gam,emph,ITERS)
        try:
            Ulist = pickle.load(open( "%sngU_iter%d.p" % (savefile,iteration), "rb" ) )
            Vlist = pickle.load(open( "%sngV_iter%d.p" % (savefile, iteration), "rb" ) )
            print 'iteration %d loaded succesfully' % iteration
            continue
        except(IOError):
            pass
        # shuffle times
        if iteration == 0: times = T
        else: times = np.random.permutation(T)
        
        for t in xrange(len(times)):   # select a time
            print 'iteration %d, time %d' % (iteration, t)
            if T[t] in attack_years:
                f = 'data/wordPMI_%d%s.mat' % (T[t],pmitail)
            else:
                f = 'data/wordPMI_' + str(T[t]) + '.mat'
            
            pmi = sio.loadmat(f)['pmi']
            
            
            
            for j in xrange(len(b_ind)): # select a mini batch
                print '%d out of %d' % (j,len(b_ind))
                ind = b_ind[j]
                ## UPDATE V
                # get data
                pmi_seg = pmi[:,ind].todense()
                
                if t==0:
                    vp = np.zeros((len(ind),r))
                    up = np.zeros((len(ind),r))
                    iflag = True
                else:
                    vp = Vlist[t-1][ind,:]
                    up = Ulist[t-1][ind,:]
                    iflag = False

                if t==len(T)-1:
                    vn = np.zeros((len(ind),r))
                    un = np.zeros((len(ind),r))
                    iflag = True
                else:
                    vn = Vlist[t+1][ind,:]
                    un = Ulist[t+1][ind,:]
                    iflag = False
                Vlist[t][ind,:] = util.update(Ulist[t],emph*pmi_seg,vp,vn,lam,tau,gam,ind,iflag)
                Ulist[t][ind,:] = util.update(Vlist[t],emph*pmi_seg,up,un,lam,tau,gam,ind,iflag)
            
      
                
            ####  INNER BATCH LOOP END
                
        # save
        print 'time elapsed = ', time.time()-start_time
       

        pickle.dump(Ulist, open( "%sngU_iter%d.p" % (savefile,iteration), "wb" ) , pickle.HIGHEST_PROTOCOL)
        pickle.dump(Vlist, open( "%sngV_iter%d.p" % (savefile, iteration), "wb" ) , pickle.HIGHEST_PROTOCOL)
def run(lam, gam, tau, ITERS, attack_years=[], savetail="", pmitail=""):
    r = 50  # rank
    b = nw  # batch size
    emph = 1  # emphasize the nonzero
    savefile = (savehead + "L" + str(lam) + "T" + str(tau) + "G" + str(gam) +
                "A" + str(emph))

    if len(savetail) > 0:
        savetail = "_" + savetail
    if len(pmitail) > 0:
        pmitail = "_" + pmitail
    savefile = savefile + savetail

    embfilename = "data/emb_static%s.mat" % savetail
    print(savefile, embfilename)
    try:
        e = sio.loadmat(embfilename)["emb"]
    except (IOError):
        print("file not available yet", embfilename)
        return

    print("starting training with following parameters")
    print_params(r, lam, tau, gam, emph, ITERS)
    print("there are a total of {} words, and {} time points".format(nw, T))

    print("X*X*X*X*X*X*X*X*X")
    print("initializing")

    Ulist = [copy.deepcopy(e) for x in T]
    Vlist = [copy.deepcopy(e) for x in T]
    del e

    print("getting batch indices")
    if b < nw:
        b_ind = util.getbatches(nw, b)
    else:
        b_ind = [range(nw)]

    import time

    start_time = time.time()
    # sequential updates
    for iteration in range(ITERS):
        print_params(r, lam, tau, gam, emph, ITERS)
        try:
            Ulist = pickle.load(
                open("%sngU_iter%d.p" % (savefile, iteration), "rb"))
            Vlist = pickle.load(
                open("%sngV_iter%d.p" % (savefile, iteration), "rb"))
            print("iteration %d loaded succesfully" % iteration)
            continue
        except (IOError):
            pass
        # shuffle times
        if iteration == 0:
            times = T
        else:
            times = np.random.permutation(T)

        for t in range(len(times)):  # select a time
            print("iteration %d, time %d" % (iteration, t))
            if T[t] in attack_years:
                # file = "data/wordPMI_%d%s.mat" % (T[t], pmitail)
                file = "data/emb_{}{}.mat".format(T[t], pmitail)
            else:
                # file = "data/wordPMI_" + str(T[t]) + ".mat"
                file = "data/emb_{}.mat".format(T[t])

            pmi = sio.loadmat(file)["pmi"]

            for j in range(len(b_ind)):  # select a mini batch
                print("%d out of %d" % (j, len(b_ind)))
                ind = b_ind[j]
                ## UPDATE V
                # get data
                pmi_seg = pmi[:, ind].todense()

                if t == 0:
                    vp = np.zeros((len(ind), r))
                    up = np.zeros((len(ind), r))
                    iflag = True
                else:
                    vp = Vlist[t - 1][ind, :]
                    up = Ulist[t - 1][ind, :]
                    iflag = False

                if t == len(T) - 1:
                    vn = np.zeros((len(ind), r))
                    un = np.zeros((len(ind), r))
                    iflag = True
                else:
                    vn = Vlist[t + 1][ind, :]
                    un = Ulist[t + 1][ind, :]
                    iflag = False
                Vlist[t][ind, :] = util.update(Ulist[t], emph * pmi_seg, vp,
                                               vn, lam, tau, gam, ind, iflag)
                Ulist[t][ind, :] = util.update(Vlist[t], emph * pmi_seg, up,
                                               un, lam, tau, gam, ind, iflag)

            ####  INNER BATCH LOOP END

        # save
        print("time elapsed = ", time.time() - start_time)

        pickle.dump(
            Ulist,
            open("%sngU_iter%d.p" % (savefile, iteration), "wb"),
            pickle.HIGHEST_PROTOCOL,
        )
        pickle.dump(
            Vlist,
            open("%sngV_iter%d.p" % (savefile, iteration), "wb"),
            pickle.HIGHEST_PROTOCOL,
        )
Example #3
0
    print('starting training with following parameters')
    print_params(r, lam, tau, gam, emph, ITERS)
    print('there are a total of {} words, and {} time points'.format(nw, T))

    print('X*X*X*X*X*X*X*X*X')
    print('initializing')

    #Ulist,Vlist = util.initvars(nw,T,r, trainhead)
    Ulist, Vlist = util.import_static_init(T)
    print(Ulist)
    print(Vlist)
    #asdf
    print('getting batch indices')
    if b < nw:
        b_ind = util.getbatches(nw, b)
    else:
        b_ind = [list(range(nw))]

    import time
    start_time = time.time()
    # sequential updates
    for iteration in range(ITERS):
        print_params(r, lam, tau, gam, emph, ITERS)
        try:
            Ulist = pickle.load(
                open("%sngU_iter%d.p" % (savefile, iteration), "rb"))
            Vlist = pickle.load(
                open("%sngV_iter%d.p" % (savefile, iteration), "rb"))
            print('iteration %d loaded succesfully' % iteration)
            continue
Example #4
0
def run_dw2v(exper_dir, iters, lam, tau, gam, emph, r):

    import DynamicWord2Vec.train_model.util_timeCD as util

    data_dir = os.path.join(exper_dir, 'embs')
    trainhead = os.path.join(data_dir, 'wordPairPMI_')
    voc_path = os.path.join(data_dir, 'wordIDHash.csv')
    embmat = os.path.join(data_dir, 'emb_static.mat')
    b = nw = get_vocabulary_size(voc_path)
    ITERS = iters
    T = get_points(data_dir)
    savehead = os.path.join(exper_dir, 'results')
    savefile = 'L{}T{}G{}A{}'.format(lam, tau, gam, emph)
    savefile = os.path.join(savehead, savefile)

    print(f"b = {b}, nw = {nw}, T = {T}")
    print(iters, lam, tau, gam, emph, r)

    print('starting training with following parameters')
    print_params(r, lam, tau, gam, emph, ITERS)
    print('there are a total of {} words, and {} time points'.format(nw, T))

    print('X*X*X*X*X*X*X*X*X')
    print('initializing')

    #Ulist,Vlist = util.initvars(nw,T,r, trainhead)
    Ulist, Vlist = util.import_static_init(T, embmat)
    print(Ulist)
    print(Vlist)

    print('getting batch indices')
    if b < nw:
        b_ind = util.getbatches(nw, b)
    else:
        b_ind = [list(range(nw))]

    import time
    start_time = time.time()
    # sequential updates
    for iteration in range(ITERS):
        print_params(r, lam, tau, gam, emph, ITERS)
        try:
            Ulist = pickle.load(
                open("%sngU_iter%d.p" % (savefile, iteration), "rb"))
            Vlist = pickle.load(
                open("%sngV_iter%d.p" % (savefile, iteration), "rb"))
            print('iteration %d loaded succesfully' % iteration)
            continue
        except (IOError):
            pass
        loss = 0
        # shuffle times
        if iteration == 0: times = T
        else: times = np.random.permutation(T)

        for t in range(len(times)):  # select a time
            print('iteration %d, time %d' % (iteration, t))
            f = trainhead + str(T[t]) + '.csv'
            print(f)
            """
            try:
                Ulist = pickle.load( open( "%sngU_iter%d_time%d_tmp.p" % (savefile,iteration,t), "rb" ) )
                Vlist = pickle.load( open( "%sngV_iter%d_time%d_tmp.p" % (savefile, iteration,t), "rb" ) )
                times = pickle.load( open( "%sngtimes_iter%d_time%d_tmp.p" % (savefile, iteration,t), "rb" ) )
                print 'iteration %d time %d loaded succesfully' % (iteration, t)
                continue
            except(IOError):
                pass
            """

            pmi = util.getmat(f, nw, False)
            for j in range(len(b_ind)):  # select a mini batch
                print('%d out of %d' % (j, len(b_ind)))
                ind = b_ind[j]
                ## UPDATE V
                # get data
                pmi_seg = pmi[:, ind].todense()

                if t == 0:
                    vp = np.zeros((len(ind), r))
                    up = np.zeros((len(ind), r))
                    iflag = True
                else:
                    vp = Vlist[t - 1][ind, :]
                    up = Ulist[t - 1][ind, :]
                    iflag = False

                if t == len(T) - 1:
                    vn = np.zeros((len(ind), r))
                    un = np.zeros((len(ind), r))
                    iflag = True
                else:
                    vn = Vlist[t + 1][ind, :]
                    un = Ulist[t + 1][ind, :]
                    iflag = False
                Vlist[t][ind, :] = util.update(Ulist[t], emph * pmi_seg, vp,
                                               vn, lam, tau, gam, ind, iflag)
                Ulist[t][ind, :] = util.update(Vlist[t], emph * pmi_seg, up,
                                               un, lam, tau, gam, ind, iflag)

            #pickle.dump(Ulist, open( "%sngU_iter%d_time%d_tmp.p" % (savefile,iteration,t), "wb" ) , pickle.HIGHEST_PROTOCOL)
            #pickle.dump(Vlist, open( "%sngV_iter%d_time%d_tmp.p" % (savefile, iteration,t), "wb" ) , pickle.HIGHEST_PROTOCOL)
            #pickle.dump(times, open( "%sngtimes_iter%d_time%d_tmp.p" % (savefile, iteration,t), "wb" ) , pickle.HIGHEST_PROTOCOL)

            ####  INNER BATCH LOOP END

        # save
        print('time elapsed = ', time.time() - start_time)

        pickle.dump(Ulist, open("%sngU_iter%d.p" % (savefile, iteration),
                                "wb"), pickle.HIGHEST_PROTOCOL)
        pickle.dump(Vlist, open("%sngV_iter%d.p" % (savefile, iteration),
                                "wb"), pickle.HIGHEST_PROTOCOL)