Example #1
0
def read_log(log_file):
    par_adv = []
    measured = False
    sys.stdout.write("Reading log")
    fo = open(log_file, "r")
    for line in fo:
        #       print line
        tim, linetype, linename, value, unit_id, treatment_id = interpret_log_line(
            line)
        if (linetype == 'meta'):
            if (linename == 'agents'):
                num_agents = int(value)
            elif (linename == 'treatnames'):
                treatnames = re.split("\@\|", value)
#               print "Treatments: ", treatnames
            elif (linename == 'block_id start'):
                sys.stdout.write(".")
                sys.stdout.flush()
                block_id = int(value)
                adv = []
                ints = []
                newsv = []
                for i in range(0, num_agents):
                    adv.append(adVector.AdVector())
                    ints.append(interest.Interests())
                    newsv.append(news.NewsVector())


#               print block_id
            elif (linename == 'assignment'):
                assignment = [int(x) for x in re.split("\@\|", value)]
            elif (linename == 'block_id end'):
                apply_labels_to_vecs(adv, ints, newsv, assignment, num_agents,
                                     len(treatnames))
                par_adv.append({
                    'advector': adv,
                    'newsvector': newsv,
                    'assignment': assignment,
                    'intvector': ints
                })
        elif (linetype == 'treatment'):
            pass
        elif (linetype == 'measurement'):
            if (linename == 'ad'):
                ind_ad = ad.Ad(value, treatment_id)
                adv[int(unit_id)].add(ind_ad)
            if (linename == 'interest'):
                ints[int(unit_id)].set_from_string(value)
            if (linename == 'news'):
                ind_news = news.News(value, treatment_id)
                newsv[int(unit_id)].add(ind_news)
        elif (linetype == 'error'):
            #           print "Error in block", block_id, ": ", line.strip()
            pass
    sys.stdout.write(".Reading complete\n")
    print "Treatments: ", treatnames
    return par_adv, treatnames
Example #2
0
def interest_vectors(
    list
):  # returns a frequency vector of interests, when input a list of interessts
    int_union = interest.Interests()
    for ints in list:
        int_union = int_union.union(ints)
    i_list = []
    labels = []
    for ints in list:
        i_list.append(int_union.gen_int_vec(ints))
        labels.append(ints.label)
    return i_list, labels, int_union
Example #3
0
def read_log(log_file):  # check
    treatnames = []
    fo = open(log_file, "r")
    line = fo.readline()
    chunks = re.split("\|\|", line)
    if (chunks[0] == 'g'):
        old = True
        gmarker = 'g'
        treatments = 2
        treatnames = ['0', '1']
        samples = len(chunks) - 1
    else:
        old = False
        gmarker = 'assign'
        treatments = int(chunks[2])
        samples = int(chunks[1])
        line = fo.readline()
        chunks = re.split("\|\|", line)
        for i in range(1, len(chunks)):
            treatnames.append(chunks[i].strip())
    fo.close()
    assert treatments == len(treatnames)
    for i in range(0, treatments):
        print "Treatment ", i, " = ", treatnames[i]
    adv = []
    ints = []
    newsv = []
    for i in range(0, samples):
        adv.append(adVector.AdVector())
        ints.append(interest.Interests())
        newsv.append(news.NewsVector())
    loadtimes = [timedelta(minutes=0)] * samples
    reloads = [0] * samples
    errors = [0] * samples
    xvfbfails = []
    breakout = False
    par_adv = []
    ass = []

    fo = open(log_file, "r")
    r = 0
    sys.stdout.write("Scanning ads")
    for line in fo:
        chunks = re.split("\|\|", line)
        chunks[len(chunks) - 1] = chunks[len(chunks) - 1].rstrip()
        if (chunks[0] == gmarker and r == 0):
            r += 1
            ass = chunks[2:]
            if (old):
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
#print ass
        elif (chunks[0] == gmarker and r > 0):
            r += 1
            par_adv.append({
                'adv': adv,
                'newsv': newsv,
                'ass': ass,
                'xf': xvfbfails,
                'interests': ints,
                'break': breakout,
                'loadtimes': loadtimes,
                'reloads': reloads,
                'errors': errors
            })
            sys.stdout.write(".")
            sys.stdout.flush()
            adv = []
            ints = []
            newsv = []
            for i in range(0, samples):
                adv.append(adVector.AdVector())
                ints.append(interest.Interests())
                newsv.append(news.NewsVector())
            loadtimes = [timedelta(minutes=0)] * samples
            reloads = [0] * samples
            errors = [0] * samples
            xvfbfails = []
            breakout = False
            ass = chunks[2:]
            if (old):
                ass = chunks[1:]
            assert len(ass) == samples
            apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
        elif (chunks[0] == 'Xvfbfailure'):
            xtreat, xid = chunks[1], chunks[2]
            xvfbfails.append(xtreat)
        elif (chunks[1] == 'breakingout'):
            breakout = True
        elif (chunks[1] == 'loadtime'):
            t = (datetime.strptime(chunks[2], "%H:%M:%S.%f"))
            delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
            id = int(chunks[3])
            loadtimes[id] += delta
        elif (chunks[1] == 'reload'):
            id = int(chunks[2])
            reloads[id] += 1
        elif (chunks[1] == 'errorcollecting'):
            id = int(chunks[2])
            errors[id] += 1
        elif (chunks[1] == 'prepref'):
            id = int(chunks[4])
            ints[id].remove_interest()
        elif (chunks[1] == 'pref'):
            id = int(chunks[4])
            int_str = chunks[3]
            ints[id].set_from_string(int_str)
        elif (chunks[0] == 'news'):
            ind_news = news.News({
                'Time':
                datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"),
                'Title':
                chunks[4],
                'Agency':
                chunks[5],
                'Ago':
                chunks[6],
                'Body':
                chunks[7].rstrip(),
                'Label':
                chunks[2]
            })
            newsv[int(chunks[1])].add(ind_news)
        elif (chunks[0] == 'ad'):
            ind_ad = ad.Ad({
                'Time':
                datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"),
                'Title':
                chunks[4],
                'URL':
                chunks[5],
                'Body':
                chunks[6].rstrip(),
                'cat':
                "",
                'Label':
                chunks[2]
            })
            adv[int(chunks[1])].add(ind_ad)
        else:  # to analyze old log files
            try:
                ind_ad = ad.Ad({
                    'Time':
                    datetime.strptime(chunks[2], "%Y-%m-%d %H:%M:%S.%f"),
                    'Title':
                    chunks[3],
                    'URL':
                    chunks[4],
                    'Body':
                    chunks[5].rstrip(),
                    'cat':
                    "",
                    'label':
                    chunks[1]
                })
                # 	 			ind_ad = ad.Ad({'Time':datetime.strptime(chunks[1], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[2],
                # 	 					'URL': chunks[3], 'Body': chunks[4].rstrip(), 'cat': "", 'label':""})
                adv[int(chunks[0])].add(ind_ad)
            except:
                pass

    r += 1
    par_adv.append({
        'adv': adv,
        'newsv': newsv,
        'ass': ass,
        'xf': xvfbfails,
        'interests': ints,
        'break': breakout,
        'loadtimes': loadtimes,
        'reloads': reloads,
        'errors': errors
    })
    sys.stdout.write(".Scanning complete\n")
    sys.stdout.flush()
    return par_adv, treatnames