Esempio n. 1
0
def vec_for_stats(
    adv, ass
):  # aggregates the control group and experiment group into single ad vectors
    advm = adVector.AdVector()
    advf = adVector.AdVector()
    for i in range(0, len(adv)):
        if (str(i) in ass[0:len(ass) / 2]):
            #print "c"+str(i)
            advm.add_vec(adv[i])
        else:
            #print "t"+str(i)
            advf.add_vec(adv[i])
    return (advm, advf)
Esempio n. 2
0
def read_log(log_file):
    par_adv = []
    measured = False
    sys.stdout.write("Reading log")
    fo = open(log_file, "r")
    for line in fo:
        #       print line
        tim, linetype, linename, value, unit_id, treatment_id = interpret_log_line(
            line)
        if (linetype == 'meta'):
            if (linename == 'agents'):
                num_agents = int(value)
            elif (linename == 'treatnames'):
                treatnames = re.split("\@\|", value)
#               print "Treatments: ", treatnames
            elif (linename == 'block_id start'):
                sys.stdout.write(".")
                sys.stdout.flush()
                block_id = int(value)
                adv = []
                ints = []
                newsv = []
                for i in range(0, num_agents):
                    adv.append(adVector.AdVector())
                    ints.append(interest.Interests())
                    newsv.append(news.NewsVector())


#               print block_id
            elif (linename == 'assignment'):
                assignment = [int(x) for x in re.split("\@\|", value)]
            elif (linename == 'block_id end'):
                apply_labels_to_vecs(adv, ints, newsv, assignment, num_agents,
                                     len(treatnames))
                par_adv.append({
                    'advector': adv,
                    'newsvector': newsv,
                    'assignment': assignment,
                    'intvector': ints
                })
        elif (linetype == 'treatment'):
            pass
        elif (linetype == 'measurement'):
            if (linename == 'ad'):
                ind_ad = ad.Ad(value, treatment_id)
                adv[int(unit_id)].add(ind_ad)
            if (linename == 'interest'):
                ints[int(unit_id)].set_from_string(value)
            if (linename == 'news'):
                ind_news = news.News(value, treatment_id)
                newsv[int(unit_id)].add(ind_news)
        elif (linetype == 'error'):
            #           print "Error in block", block_id, ": ", line.strip()
            pass
    sys.stdout.write(".Reading complete\n")
    print "Treatments: ", treatnames
    return par_adv, treatnames
Esempio n. 3
0
def temp_ad_vectors(list):
    ad_union = adVector.AdVector()
    for ads in list:
        ad_union = ad_union.union(ads)
    tav_list = []
    labels = []
    for ads in list:
        tav_list.append(ad_union.gen_temp_ad_vec(ads))
        labels.append(ads.label)
    return tav_list, labels, ad_union
Esempio n. 4
0
def ad_vectors(
        list
):  # returns a frequency vector of ads, when input a list of adVecs
    ad_union = adVector.AdVector()
    for ads in list:
        ad_union = ad_union.union(ads)
    av_list = []
    labels = []
    for ads in list:
        av_list.append(ad_union.gen_ad_vec(ads))
        labels.append(ads.label)
    return av_list, labels, ad_union  ## Returns entire ad as feature
Esempio n. 5
0
def word_vectors(
        list
):  # returns a frequency vector of words, when input a list of adVecs
    ad_union = adVector.AdVector()
    for ads in list:
        ad_union = ad_union.union(ads)
    words = ad_union.advec_to_words()
    stemmed_words = common.stem_low_wvec(words)
    filtered_words = [
        w for w in stemmed_words if not w in stopwords.words('english')
    ]
    word_v = common.unique_words(filtered_words)
    word_v = common.strip_vec(word_v)
    wv_list = []
    labels = []
    for ads in list:
        wv_list.append(ads.gen_word_vec(word_v))
        labels.append(ads.label)
    return wv_list, labels, word_v  ## Returns word_v as feature
Esempio n. 6
0
def ad_vectors(
    list,
    filtered_by=None
):  # returns a frequency vector of ads, when input a list of adVecs
    ad_union = adVector.AdVector()
    if (filtered_by == None):
        new_list = list
    else:
        new_list = []
        for ads in list:
            new_ads = ads.filter_by_keywords(filtered_by)
            new_list.append(new_ads)
    for ads in new_list:
        ad_union = ad_union.union(ads)
    av_list = []
    labels = []
    for ads in new_list:
        av_list.append(ad_union.gen_ad_vec(ads))
        labels.append(ads.label)
    return av_list, labels, ad_union  ## Returns entire ad as feature
Esempio n. 7
0
def read_log(log_file):  # check
    treatnames = []
    fo = open(log_file, "r")
    line = fo.readline()
    chunks = re.split("\|\|", line)
    if (chunks[0] == 'g'):
        old = True
        gmarker = 'g'
        treatments = 2
        treatnames = ['0', '1']
        samples = len(chunks) - 1
    else:
        old = False
        gmarker = 'assign'
        treatments = int(chunks[2])
        samples = int(chunks[1])
        line = fo.readline()
        chunks = re.split("\|\|", line)
        for i in range(1, len(chunks)):
            treatnames.append(chunks[i].strip())
    fo.close()
    assert treatments == len(treatnames)
    for i in range(0, treatments):
        print "Treatment ", i, " = ", treatnames[i]
    adv = []
    ints = []
    newsv = []
    for i in range(0, samples):
        adv.append(adVector.AdVector())
        ints.append(interest.Interests())
        newsv.append(news.NewsVector())
    loadtimes = [timedelta(minutes=0)] * samples
    reloads = [0] * samples
    errors = [0] * samples
    xvfbfails = []
    breakout = False
    par_adv = []
    ass = []

    fo = open(log_file, "r")
    r = 0
    sys.stdout.write("Scanning ads")
    for line in fo:
        chunks = re.split("\|\|", line)
        chunks[len(chunks) - 1] = chunks[len(chunks) - 1].rstrip()
        if (chunks[0] == gmarker and r == 0):
            r += 1
            ass = chunks[2:]
            if (old):
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
#print ass
        elif (chunks[0] == gmarker and r > 0):
            r += 1
            par_adv.append({
                'adv': adv,
                'newsv': newsv,
                'ass': ass,
                'xf': xvfbfails,
                'interests': ints,
                'break': breakout,
                'loadtimes': loadtimes,
                'reloads': reloads,
                'errors': errors
            })
            sys.stdout.write(".")
            sys.stdout.flush()
            adv = []
            ints = []
            newsv = []
            for i in range(0, samples):
                adv.append(adVector.AdVector())
                ints.append(interest.Interests())
                newsv.append(news.NewsVector())
            loadtimes = [timedelta(minutes=0)] * samples
            reloads = [0] * samples
            errors = [0] * samples
            xvfbfails = []
            breakout = False
            ass = chunks[2:]
            if (old):
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
        elif (chunks[0] == 'Xvfbfailure'):
            xtreat, xid = chunks[1], chunks[2]
            xvfbfails.append(xtreat)
        elif (chunks[1] == 'breakingout'):
            breakout = True
        elif (chunks[1] == 'loadtime'):
            t = (datetime.strptime(chunks[2], "%H:%M:%S.%f"))
            delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
            id = int(chunks[3])
            loadtimes[id] += delta
        elif (chunks[1] == 'reload'):
            id = int(chunks[2])
            reloads[id] += 1
        elif (chunks[1] == 'errorcollecting'):
            id = int(chunks[2])
            errors[id] += 1
        elif (chunks[1] == 'prepref'):
            id = int(chunks[4])
            ints[id].remove_interest()
        elif (chunks[1] == 'pref'):
            id = int(chunks[4])
            int_str = chunks[3]
            ints[id].set_from_string(int_str)
        elif (chunks[0] == 'news'):
            ind_news = news.News({
                'Time':
                datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"),
                'Title':
                chunks[4],
                'Agency':
                chunks[5],
                'Ago':
                chunks[6],
                'Body':
                chunks[7].rstrip(),
                'Label':
                chunks[2]
            })
            newsv[int(chunks[1])].add(ind_news)
        elif (chunks[0] == 'ad'):
            ind_ad = ad.Ad({
                'Time':
                datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"),
                'Title':
                chunks[4],
                'URL':
                chunks[5],
                'Body':
                chunks[6].rstrip(),
                'cat':
                "",
                'Label':
                chunks[2]
            })
            adv[int(chunks[1])].add(ind_ad)
        else:  # to analyze old log files
            try:
                ind_ad = ad.Ad({
                    'Time':
                    datetime.strptime(chunks[2], "%Y-%m-%d %H:%M:%S.%f"),
                    'Title':
                    chunks[3],
                    'URL':
                    chunks[4],
                    'Body':
                    chunks[5].rstrip(),
                    'cat':
                    "",
                    'label':
                    chunks[1]
                })
                # 	 			ind_ad = ad.Ad({'Time':datetime.strptime(chunks[1], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[2],
                # 	 					'URL': chunks[3], 'Body': chunks[4].rstrip(), 'cat': "", 'label':""})
                adv[int(chunks[0])].add(ind_ad)
            except:
                pass

    r += 1
    par_adv.append({
        'adv': adv,
        'newsv': newsv,
        'ass': ass,
        'xf': xvfbfails,
        'interests': ints,
        'break': breakout,
        'loadtimes': loadtimes,
        'reloads': reloads,
        'errors': errors
    })
    sys.stdout.write(".Scanning complete\n")
    sys.stdout.flush()
    return par_adv, treatnames