def fromCSV(TimeData,dataPath1,dataPath2, dataName):
        """Prepares data of survival over time for model definition. Initialize
        from a csv file using class method fromCSV.

            Input:
            - dataPath1, dataPath2 (str): paths to csv files corresponding to the survival 
            over time of each group. The csv files should have the days of observations in 
            each column, starting from 0 (which will indicate how many hosts were challenged
            in each dose) in second column (the first column will have the doses). The last 
            column will indicate the last day of observation and the number of hosts which 
            survived each of the challenges. The results from each of the challenge doses 
            should be put in each line, starting from the second line (the first line will 
            have the days of observation). The doses and final day of observation should be 
            the same for both groups.

            - dataName (str): a text string which should have no spaces and should be 
            descriptive of the data. Will be used to name folder and files of saved results 
            (ex: 'survWolb2012'). 

            Returns a Data object. 
        """
        (timesDeath1,survivors1,tmax1,times1,doses1,ndoses1,nhosts1)=ut.readcsv(dataPath1)
        (timesDeath2,survivors2,tmax2,times2,doses2,ndoses2,nhosts2)=ut.readcsv(dataPath2)
        
        if ~((tmax1==tmax2)&(sum(times1==times2)==len(times1))):
            raise DataError("Times of observation not the same in two datasets, please check the data in %s and %s"%(dataPath1,dataPath2))
            
        if ~((ndoses1==ndoses2)&(sum(doses1==doses2)==ndoses1)):
            raise DataError("Doses not the same in two datasets, please check the data in %s and %s"%(dataPath1,dataPath2))
        
        return TimeData(timesDeath1,timesDeath2,survivors1,survivors2,nhosts1,nhosts2,tmax1,times1,doses1,ndoses1,dataName,dataPath1,dataPath2)         
Ejemplo n.º 2
0
def generate_report(date):
    script_path = os.path.dirname(os.path.abspath(__file__))
    csv_file = f'watch-{date}.csv'

    if not os.path.exists(os.path.join(script_path, 'csv_data', csv_file)):
        return False

    shutil.copy2(os.path.join(script_path, 'csv_data', csv_file),
                 os.path.join(script_path, 'web'))

    report_date = datetime.strptime(date, '%m-%d-%Y').strftime("%A %-d %B %Y")
    generation_date = datetime.now().strftime('%d %B %Y %H:%M')

    csv_data = readcsv(os.path.join(script_path, 'csv_data', csv_file))
    colors_data = readjson(os.path.join(script_path, 'web', 'colors.json'))

    json_data = json.dumps({
        'report_date': report_date,
        'generation_date': generation_date,
        'csv_file': csv_file,
        'csv_data': csv_data,
        'colors_data': colors_data
    })

    rendered = _render_template(json_data)
    report_filename = os.path.join(script_path, 'web', f'report-{date}.html')
    with open(os.path.join(report_filename), 'w') as f:
        f.write(rendered)

    return report_filename
Ejemplo n.º 3
0
def load_eval_data(data_path, w2v_path, w2v_dim, sequence_length, num_classes,
                   labels):
    print("[LOADING] data: ", data_path)
    data = utils.readcsv(data_path)
    tagged_sents = []
    e1_list = []
    e2_list = []
    y_list = []
    for line in data:
        tagged_s, e1, e2 = utils.pos_tag(line[0])
        tagged_sents.append(tagged_s)
        e1_list.append(e1)
        e2_list.append(e2)
        one_hot_label = one_hot_encodding(labels.index(line[1]), num_classes)
        y_list.append(one_hot_label)

    bow, embed = const_bow(tagged_sents, w2v_path, w2v_dim)
    sentences = []
    en1_pos = []
    en2_pos = []

    for i in range(len(tagged_sents)):
        pos_tagged_sent = tagged_sents[i]
        e1 = e1_list[i]
        e2 = e2_list[i]

        tmp_sent = []
        tmp_pos1 = []
        tmp_pos2 = []

        for idx, token in enumerate(pos_tagged_sent):
            if token in bow:
                tmp_sent.append(bow[token])
                tmp_pos1.append(pos_embed(e1 - idx))
                tmp_pos2.append(pos_embed(e2 - idx))
        sent_len = len(pos_tagged_sent)
        while len(tmp_sent) != sequence_length:
            tmp_sent.append(bow["<zero>"])
            tmp_pos1.append(122)
            tmp_pos2.append(122)
            sent_len += 1
        sentences.append(tmp_sent)
        en1_pos.append(tmp_pos1)
        en2_pos.append(tmp_pos2)
    return sentences, en1_pos, en2_pos, y_list, bow, embed
Ejemplo n.º 4
0
def parse(path, label, output=None, recursive=None, clean=False):
    """
    Read csv to extract and parse html from single or multiple files and write results to .txt.
    @params:
        path      - Required : file or directory to parse (Str)
        label     - Required : csv variable name in which html is stored
        output    - Optional : output filename (use only if parsing single files) (Str)
        recursive - Optional : recursive execution for directories (Bool)
        clean     - Optional : preprocessing of the input with utils.clean (Bool)
    """
    output = output if output else path + '.html'

    if recursive:
        files = [f for f in os.listdir(path) if not f.startswith('.')]
        files = [f for f in files if f.endswith('.csv')]
        for index, filename in enumerate(files):
            parse(path + filename, label, None, False, clean)
            print 'Parsed document ' + str(index + 1) + ' of ' + str(
                len(files))

    else:
        utils.progressbar_update(0, path)
        read_size = 0
        html_file = open(output, 'w+')
        for index, row in enumerate(utils.readcsv(path)):
            read_size += row['size']
            row = row[label]
            regex = re.compile(ur'[^\x00-\x7F]+', re.UNICODE)
            row = re.sub(regex, '', row)
            row = re.sub('[\n\t\r]', '', row)
            row = re.sub('<article', '\n<article', row)
            html_file.write(row)

            utils.progressbar_update(read_size, path)

        html_file.close()
        html_parse.parse(path=output,
                         output=None,
                         recursive=False,
                         clean=clean)
Ejemplo n.º 5
0
def load_agent_test_data(data_path, sequence_length, num_classes, vocab_,
                         labels, entity2_dict):
    print("[LOADING] data: ", data_path)
    data = utils.readcsv(data_path)
    sentences = []
    en1_pos = []
    en2_pos = []
    y_list = []
    entities = []
    for line in tqdm(data):
        sent = line[0]
        entity1 = re.findall("<e1>.*?</e1>", sent)[0]
        entity2 = re.findall("<e2>.*?</e2>", sent)[0]
        entity1 = entity1.replace("<e1>", "").replace("</e1>", "")
        entity2 = entity2.replace("<e2>", "").replace("</e2>", "")
        one_hot_label = one_hot_encodding(labels.index(line[1]), num_classes)
        pos_tagged_sent, e1, e2 = utils.pos_tag(sent)
        tmp_sent = []
        tmp_pos1 = []
        tmp_pos2 = []
        for idx, token in enumerate(pos_tagged_sent):
            if token in vocab_:
                tmp_sent.append(vocab_[token])
                tmp_pos1.append(pos_embed(e1 - idx))
                tmp_pos2.append(pos_embed(e2 - idx))
        sent_len = len(pos_tagged_sent)
        while len(tmp_sent) != sequence_length:
            tmp_sent.append(vocab_["<zero>"])
            tmp_pos1.append(122)
            tmp_pos2.append(122)
            sent_len += 1
        sentences.append(tmp_sent)
        en1_pos.append(tmp_pos1)
        en2_pos.append(tmp_pos2)
        y_list.append(one_hot_label)
        entity_pair = (entity1, entity2)
        entities.append(entity_pair)
    Bags = constBag(entities, y_list, entity2_dict)
    return sentences, en1_pos, en2_pos, y_list, Bags
Ejemplo n.º 6
0
    predicted_y = []
    probs=[]
    for batch in tqdm(range(total_batch_size)):
        st = batch*batch_size
        en = min((batch+1)*batch_size,total_data_size)

        batch_sents = sents_list[st:en]
        batch_y = y_list[st:en]
        batch_pos1 = en1_position_list[st:en]
        batch_pos2 = en2_position_list[st:en]
        feed_dict = {
            cnn.input_text: batch_sents,
            cnn.input_y: batch_y,
            cnn.pos1: batch_pos1,
            cnn.pos2: batch_pos2
        }

        prediction,probabilities = sess.run([cnn.prediction,cnn.probabilities], feed_dict=feed_dict)
        for i in range(len(batch_sents)):
            predicted_y.append(properties_list[prediction[i]])
            probs.append(probabilities[i][prediction[i]])


result=[]
eval_data = utils.readcsv(test_data)
for i,line in enumerate(eval_data):
    w = [line[0],predicted_y[i],line[2],line[3],line[4],probs[i]]
    result.append(w)

utils.writecsv(output_path,result)
     plugload_name = sys.argv[0];
     state_fname = sys.argv[1];
     
     postgresops.check_evil(plugload_name);
     plugload_row = devicedb.get_devicemetas(where="key='PLUGLOAD' and value='%s'"%plugload_name,limit=1);
     if ( len(plugload_row)==0 ):
         print "Cannot find plugload metadata for "+plugload_name
         sys.exit(1)
         
     plugload_id = plugload_row[0].ID
 else:
     print "Not enough (or too many ) input arguments";
     sys.exit(1);
 
 if dtype=="GHMM":
     data = utils.readcsv(state_fname);
     if ( len(data) == 0 ):
         print "No data in file";
         sys.exit(1);
     elif len(data[0]) != 4:
         print "GHMM data file must be CSV with format \"state_no,counts,mean,variance\""
         sys.exit(1);
     
     print "Read %d states"%len(data);
     
     if not pretend:
         import learningdb
         learningdb.connect()
         learningdb.initdb();
         
         learningdb.insertHMMGaussianEmissions(plugload_id,fromtime,totime,[i[0] for i in data],[i[1] for i in data],[i[2] for i in data],[i[3] for i in data]);
    for g_line, p_line in zip(gold_data, pred_data):
        rel = g_line[1]
        if rel in dict_data:
            dict_data[rel]["gold_data"].append(g_line[-1])
            dict_data[rel]["pred_data"].append(p_line[-1])
        else:
            dict_data[rel] = {}
            dict_data[rel]["gold_data"] = [g_line[-1]]
            dict_data[rel]["pred_data"] = [p_line[-1]]
    return dict_data


goldfile = "../data/ko/agent_test.csv"
predfile = "../result/agent_target_addBOTH_44_result.csv"

gold_data = utils.readcsv(goldfile)
pred_data = utils.readcsv(predfile)

gold_tags = exTag(gold_data)
pred_tags = exTag(pred_data)
print(classification_report(gold_tags, pred_tags, digits=4))
macro_p, macro_r, macro_f1, _ = precision_recall_fscore_support(
    gold_tags, pred_tags, average="macro")
micro_p, micro_r, micro_f1, _ = precision_recall_fscore_support(
    gold_tags, pred_tags, average="micro")
acc = accuracy_score(gold_tags, pred_tags)
print("Macro: {:.4f}\t{:.4f}\t{:.4f}".format(macro_p, macro_r, macro_f1))
print("Micro: {:.4f}\t{:.4f}\t{:.4f}".format(micro_p, micro_r, micro_f1))
print("acc: {:.4f}".format(acc))

dict_data = rel_data(gold_data, pred_data)
Ejemplo n.º 9
0
import numpy as np
import os
import sys
cwd = os.getcwd()
utilspath = cwd + '/../utils/'
sys.path.append(utilspath)
import utils
basefolder = cwd + '/../../data/filter/'

#first get the LNA gain:
datafolder2 = '../../data/gain2/'
snarray = ['022966','022967','022968','022969','022970','023230','023231','023232']
lnagain = []
for sn in snarray:
    fgainname = datafolder2 + 'wenteqS21'+ sn + '.csv'
    data = utils.readcsv(fgainname)
    lnagain.append(data)


# get the on board filter data (CBP-B1230C+_Plus25DegC.s2p)
fref = 1400 #MHz freq to be normalized to (because resistor calibration was done at this frequency)
fname = '/CBP-B1230C+_Plus25DegC.s2p'
boardfilter = utils.readminicircuitdata(basefolder + fname) # (return [freq,gain])
attenatfref = np.interp([fref],boardfilter[0],boardfilter[1])
boardfilter[1] = boardfilter[1] - attenatfref # normalize the gain to the reference frequency

#get the biasT filter from data
measbiast = utils.getjacquesmeas(basefolder+'/scope_42.csv',basefolder+'/scope_43.csv')

#get the cable loss from data
meascable = utils.getjacquesmeas(basefolder+'/scope_43.csv',basefolder+'/scope_44.csv')