예제 #1
0
def scrape_date(soup):
    """
    Scrapes the publication date of a website using time HTML tag.
    """
    log("Scraping publication date.")
    # This is a backup in the case of Wayback not having a record
    pub_date = soup.find('time')
    if pub_date is None:
        return None
    pub_date = pub_date.get('datetime')
    pub_date = iso_to_date(pub_date)
    # Get rid of timezone info
    pub_date = pub_date.replace(tzinfo=None)
    return pub_date
예제 #2
0
 def __init__(self, develop_mode: bool):
     """
     :param develop_mode: mode for force debugging
     :type <bool>
     """
     self.message_forward_data = dict()
     self.__develop_mode = develop_mode
     self.logger = log('forward', 'forward.log', 'INFO')
예제 #3
0
파일: train.py 프로젝트: rbtsbg/lrv
def validate(xgcn, dataloader, device):
    """Determines performance of xgcn model on the data provided by dataloader."""

    log('Validating...')
    xgcn.eval()
    xgcn.to(device)
    outputs = None
    targets = None
    for (embeddings, adjacencies, labels) in tqdm(dataloader):
        embeddings = embeddings.to(device)
        adjacencies = adjacencies.to(device)
        labels = labels.to(device)
        if targets is None:
            targets = labels
        else:
            targets = torch.cat((targets, labels))

        output = xgcn(embeddings, adjacencies)
        output = torch.argmax(output, dim=1)

        if outputs is None:
            outputs = output
        else:
            outputs = torch.cat((outputs, output))

    outputs = outputs.tolist()
    targets = targets.tolist()

    outputs, targets = zip(*((output, target)
                             for output, target in zip(outputs, targets)
                             ))  # todo what does this do?
    outputs = list(outputs)
    targets = list(targets)

    f_score_micro = f1_score(y_pred=outputs, y_true=targets, average='micro')
    f_score_macro = f1_score(y_pred=outputs, y_true=targets, average='macro')
    f_score_weighted = f1_score(y_pred=outputs,
                                y_true=targets,
                                average='weighted')
    log('...done validating.')

    return {
        'micro': f_score_micro,
        'macro': f_score_macro,
        'weighted': f_score_weighted
    }
예제 #4
0
 def __init__(self, is_get_data: bool = True):
     self.admins_list: list = []
     self.users_ban_list: list = []
     self.users_count: int = 0
     self.isClear: bool = True
     self.isUpdate: bool = False
     self.data: dict = {}
     self.logger = log('Cache', 'cache.log', 'INFO')
     if is_get_data:
         self.get_data()
예제 #5
0
def send_birthday_emails(request):
    tz = pytz.timezone(TIME_ZONE)
    now = datetime.datetime.now(tz)

    todays_birthday_persons = Person.objects\
        .filter(birth_date__month=now.month,
                birth_date__day=now.day)\
        .all()

    if todays_birthday_persons.count() == 0:
        return HttpResponse('no birthdays today.')
    else:
        admin = EmailMaster.objects.first()
        msg = ''
        for person in todays_birthday_persons:
            if person.last_birthday_email_sent_on_year < now.year:

                data = get_template_params(person)

                template = render_to_string('polls/happy_birthday_email.html',
                                            data)

                send_email(admin.email, person.email, 'Happy birthday!',
                           template)

                send_email(
                    admin.email, admin.email,
                    'BIRTHDAY: {} on {}'.format(person, person.birth_date), '')

                person.last_birthday_email_sent_on_year = now.year
                person.save()
                msg += 'Happy Birthday, {}! '.format(person)

                log('Birthday email for {} was sent at {}. '.format(
                    person, now))
            else:
                msg += 'Birthday email for a person was already sent. <br />'.format(
                    person)
        return HttpResponse(msg)
예제 #6
0
파일: postprocess.py 프로젝트: rbtsbg/lrv
def to_latex(path_in, path_out, weight, base, max_seq_len=-1, crop=-1):
    all_explanations = []
    with open(path_in, 'r') as fin:
        for jsonl in tqdm(fin):
            # early stopping (consider tex out-of-resources error)
            if 0 < crop < len(all_explanations):
                log('Reached max number of explanations.')
                break
            jsonl = ujson.loads(jsonl)

            nodes = jsonl['graph']['nodes']
            if max_seq_len > 0 and len(nodes) > max_seq_len:
                log(f"Skipping line because max seq length exceeded.")
                continue
            edges = jsonl['graph']['edges']

            label_true = jsonl['label']
            label_pred = jsonl['prediction']['label']

            graph = XGraph()
            for node in nodes:
                graph.add_node(XNode(id=node['id'], label=node['label'], type='TOKEN'))

            for edge in edges:
                graph.add_edge(graph.get_node(edge['source']), graph.get_node(edge['target']), t=edge['type'])

            # collect the relevance flow through the layers
            relevance_flow = jsonl['relevance_flow']

            _, explanations = normalize_explanations(graph=graph,
                                                     relevance_flow=relevance_flow,
                                                     true_label=label_true,
                                                     predicted_label=label_pred)
            all_explanations = all_explanations + explanations

    latex = explanations_to_latex(explanations=all_explanations, weight=weight, base=base)
    with open(path_out, 'w') as fout:
        fout.write(latex)
예제 #7
0
파일: hooks.py 프로젝트: damo-da/birthday
def cb(sender, instance, *args, **kwargs):
    log('Saving {}'.format(str(instance)))

    if instance.hero is not None and instance.hero.gender != instance.gender:
        log('Hero is of the wrong gender. Removing hero.')
        instance.hero = None

    if instance.hero is None:
        log('Assigning a random hero.')
        instance.hero = get_random_superhero(instance.gender)
예제 #8
0
def preprocess_pubmed(path, to_lower, language_model):
    """Prepocesses PubMed file into list of XGraph objects."""
    pattern = "###[0-9]+$"
    pattern = re.compile(pattern)

    # retrieve output path from input path so that there is no manual mixup of the file names
    path_out = path.replace('.txt', '.p')  # iath.split('.')[:-1][0] + ".p"

    f_in = open(path, 'r')
    lines = f_in.readlines()
    graphs = []
    nlp = spacy.load(language_model,
                     disable=[
                         'tagger', 'ner', 'textcat', 'entity_ruler',
                         'sentenizer', 'merge_noun_chunks', 'merge_entities',
                         'merge_subtokens'
                     ])

    written = 0
    discarded = 0
    for line in lines:
        line = line.strip()
        if len(line) == 0 or pattern.match(line.strip()):
            discarded = discarded + 1
            continue
        label, graph = line_to_graph(line.strip(), nlp, to_lower=to_lower)
        graphs.append((label, graph))
        written = written + 1
        if written % 1000 == 999:
            log('Processed {} lines'.format(written + 1))

    f_in.close()

    log("Wrote {} graphs from {} to {}, discarded {} lines.".format(
        written, path, path_out, discarded))

    log("Pickling to {}...".format(path_out))
    pickle.dump(graphs, open(path_out, 'wb'))
    log("...done pickling.")

    return path_out
예제 #9
0
파일: train.py 프로젝트: rbtsbg/lrv
def train(loader_train, loader_dev, path_model, epochs, batch_size, pad, nfeat,
          nhid, patience, metric, random_seed):
    """Trains an GCN."""

    nclasses = 5
    assert metric in ["weighted", "macro", "micro"]

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_seed)
        log("Set random cuda seed to {}".format(random_seed))
    else:
        torch.manual_seed(random_seed)
        log("Set manual seed to {}".format(random_seed))

    log("Training on device {}".format(device))

    xgcn = XGCN(nfeat=nfeat, nhid=nhid, nclass=nclasses, pad=pad, bias=None)
    xgcn.to(device)
    print(xgcn)
    optimizer = Adam(params=xgcn.parameters())  # todo pass as argument

    scores = validate(xgcn=xgcn, dataloader=loader_dev, device=device)
    report(epoch=0, split="Dev", scores=scores)

    torch.save(xgcn.state_dict(), path_model)
    log("Saved initial model to {}.".format(path_model))

    wait = 0
    score_last = float('-inf')

    running_loss = 0.0
    for epoch in range(epochs):
        xgcn.train()
        for batch_idx, (embeddings, adjacencies,
                        labels) in enumerate(loader_train):

            embeddings = embeddings.to(device)
            adjacencies = adjacencies.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            preds = xgcn(embeddings, adjacencies)
            loss = F.nll_loss(preds, labels)
            loss.backward()
            optimizer.step()
            xgcn.xfc.weight.data.clamp_(0)

            # print statistics
            running_loss += loss.item()
            if batch_idx % 10 == 9:
                log('[%d, %5d, %5d] loss: %.3f' %
                    (epoch + 1, batch_idx + 1,
                     (batch_idx + 1) * batch_size, running_loss / 10))
                running_loss = 0.0

        scores = validate(xgcn=xgcn, dataloader=loader_dev, device=device)
        report(epoch=epoch + 1, split="Dev", scores=scores)

        score_current = scores[metric]

        if score_current > score_last:
            torch.save(xgcn.state_dict(), path_model)
            log("{} score improved from {:.3f} to {:.3f}. Saved model to {}.".
                format(metric, score_last, score_current, path_model))
            score_last = score_current
            wait = 0
        else:
            wait = wait + 1
            if wait >= patience:
                log("Terminating training after {} epochs w/o improvement.".
                    format(wait))
                return xgcn
예제 #10
0
파일: train.py 프로젝트: rbtsbg/lrv
def report(epoch, split, scores):
    log("Epoch: {} Split: {} F-micro: {:.3f} F-macro: {:.3f} F-weighted: {:.3f}"
        .format(epoch, split, scores['micro'], scores['macro'],
                scores['weighted']))
예제 #11
0
파일: train.py 프로젝트: rbtsbg/lrv
        if score_current > score_last:
            torch.save(xgcn.state_dict(), path_model)
            log("{} score improved from {:.3f} to {:.3f}. Saved model to {}.".
                format(metric, score_last, score_current, path_model))
            score_last = score_current
            wait = 0
        else:
            wait = wait + 1
            if wait >= patience:
                log("Terminating training after {} epochs w/o improvement.".
                    format(wait))
                return xgcn


if __name__ == "__main__":
    log('Training...')
    cfg = config('./config.json')
    print(json.dumps(cfg, indent=2))

    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--file_train_pickle',
        type=str,
        default=cfg['preprocessing']['pubmed']['file_train_pickle'])
    parser.add_argument(
        '--file_dev_pickle',
        type=str,
        default=cfg['preprocessing']['pubmed']['file_dev_pickle'])
    parser.add_argument(
        '--file_test_pickle',
예제 #12
0
파일: explain.py 프로젝트: rbtsbg/lrv
def explain(nfeat,
            nhid,
            padding,
            path_model,
            path_text,
            path_out,
            path_label2vec,
            lower_bound,
            upper_bound,
            language_model,
            to_lower,
            crop,
            do_occlude,
            drop=None,
            step=None,
            verbose=True):
    if do_occlude:
        assert drop is not None, 'Define drop range.'
        assert step is not None, 'Define step size.'
        # assert 0 < drop <= 1 - step, 'Drop range or step size outside of valid scope.'
    if crop > 0:
        warnings.warn("Cropping dataset.")

    assert lower_bound <= upper_bound, 'Lower bound greater than upper bound'

    CLASSES = PubMedDataset.classes()
    nclasses = len(CLASSES)

    # declare model
    xgcn = XGCN(nfeat, nhid, nclasses, padding, None)

    # load weights, assume model was trained on a GPU but when loading, map to current location
    device = 'cpu' if not torch.cuda.is_available() else 'cuda:0'
    xgcn.load_state_dict(torch.load(path_model, map_location=device))

    # pubmed data specific pattern used to identify lines that contain no data point
    pattern = "###[0-9]+$"
    pattern = re.compile(pattern)

    # spacy creates the dependency tree
    nlp = spacy.load(language_model)

    # label2vec dictionary, used to map node labels (i.e. tokens) onto embeddings
    label2vec = pickle.load(open(path_label2vec, 'rb'))

    # all graph embeddings need to be of the same size
    pad = Pad(padding=padding)

    with open(path_text, 'r') as fin:

        # lines processed
        line_counter = 0
        # each json line is self contained at the price of redundancy
        with open(path_out, 'w+') as fout:
            for line in tqdm(fin.readlines()):

                # if line contains no data point, skip
                if pattern.match(line) or len(line.strip()) == 0:
                    continue

                line_counter = line_counter + 1
                if 0 <= crop <= line_counter:
                    log(f"Terminating after {line_counter} lines, due to crop of {crop}."
                        )
                    break

                # disable dropouts
                xgcn.eval()

                # cache inputs during forward pass
                xgcn.set_explainable(True)

                # declare json line
                jsonl = dict()

                jsonl['line'] = line_counter

                # save config
                jsonl['padding'] = padding

                # save vocabulary path
                jsonl['label2vec'] = path_label2vec

                # save model state
                jsonl['model'] = {}
                jsonl['model']['path'] = path_model
                jsonl['model']['device'] = device
                jsonl['model']['architecture'] = xgcn.__repr__().strip(
                ).replace(os.linesep, '')

                # save raw text
                jsonl['text'] = line.strip()

                # retrieve label and graph, save
                label, graph = line_to_graph(line=line,
                                             nlp=nlp,
                                             to_lower=to_lower)
                graph_json = graph.to_json()
                jsonl['graph'] = graph_json
                jsonl['label'] = label

                # perform forward pass, save resulting tensor
                jsonl['prediction'] = dict()
                e, a = graph.E(label2vec=label2vec), graph.A_tilde()
                x_sample = XSample(embedding=e, adjacency=a)
                x_sample = pad(x_sample)
                x_sample.to_tensor()
                e, a = x_sample.EMBEDDING, x_sample.ADJACENCY

                # since xgcn is in explainable mode, softmax layer should be deactivated
                pred_tensor = xgcn(embedding=e, adjacency=a)
                jsonl['prediction']['tensor'] = tensor_to_list(pred_tensor)

                # save label
                max_idx = pred_tensor.argmax()

                if pred_tensor[0][max_idx] <= 0:
                    warnings.warn(
                        f'Maximum output of GCN is <=0 (line {line_counter}), will ignore this data point.'
                    )
                    continue

                pred_label = CLASSES[max_idx.item()]
                jsonl['prediction']['label'] = pred_label

                # perform layerwise relevance propagation, save layerwise relevance
                R = torch.zeros_like(pred_tensor)
                R[0][max_idx] = pred_tensor[0][max_idx]
                _, relevance_flow = xgcn.relprop(R,
                                                 lower_bound=lower_bound,
                                                 higher_bound=upper_bound)
                jsonl['relevance_flow'] = relevance_flow

                if do_occlude:
                    jsonl = occlude(graph=graph,
                                    jsonl=jsonl,
                                    xgcn=xgcn,
                                    adjacency=a,
                                    embedding=e,
                                    drop=drop,
                                    step=step,
                                    padding=padding,
                                    verbose=verbose,
                                    line_counter=line_counter)

                fout.write((ujson.dumps(jsonl) + os.linesep))
                fout.flush()
            fout.close()

    return True
예제 #13
0
            log('Processed {} lines'.format(written + 1))

    f_in.close()

    log("Wrote {} graphs from {} to {}, discarded {} lines.".format(
        written, path, path_out, discarded))

    log("Pickling to {}...".format(path_out))
    pickle.dump(graphs, open(path_out, 'wb'))
    log("...done pickling.")

    return path_out


if __name__ == '__main__':
    log('Preprocessing...')

    cfg = config('./config.json')
    print(json.dumps(cfg, indent=2))

    parser = argparse.ArgumentParser()
    parser.add_argument('--preprocess_wordvectors',
                        type=bool,
                        default=cfg['preprocessing']['word_vectors']['doit'])
    parser.add_argument(
        '--vocab_size',
        type=int,
        default=cfg['preprocessing']['word_vectors']['vocab_size'])
    parser.add_argument(
        '--file_word2vec',
        type=str,
예제 #14
0
from bot import create_bot_instance, hidden_forward, bot_cache
from helpers.log import log
import threading
import telebot
import flask
from time import sleep
from config import *


logger_main = log('main', 'main.log', 'WARNING')


def update_cache(timeout: int):
    try:
        while True:
            sleep(timeout)
            bot_cache.update_cache()
            hidden_forward.clear_data()
    except Exception as err:
        logger_main.warning(err.with_traceback(None))


def flask_init(bot_object):
    web_hook_app = flask.Flask(__name__)
    url_path = f"/{TOKEN}/"

    @web_hook_app.route('/', methods=['GET', 'HEAD'])
    def index():
        return ''

    @web_hook_app.route(url_path, methods=['POST'])
예제 #15
0
파일: explain.py 프로젝트: rbtsbg/lrv
                                    embedding=e,
                                    drop=drop,
                                    step=step,
                                    padding=padding,
                                    verbose=verbose,
                                    line_counter=line_counter)

                fout.write((ujson.dumps(jsonl) + os.linesep))
                fout.flush()
            fout.close()

    return True


if __name__ == '__main__':
    log('Explaining...')
    cfg = config('./config.json')

    print(ujson.dumps(cfg, indent=2))

    parser = argparse.ArgumentParser()

    parser.add_argument('--nfeat', type=int, default=cfg['training']['nfeat'])
    parser.add_argument('--nhid', type=int, default=cfg['training']['nhid'])
    parser.add_argument('--path_model',
                        type=str,
                        default=cfg['training']['path_model'])
    parser.add_argument('--pad', type=int, default=cfg['training']['pad'])
    parser.add_argument(
        '--file_test_text',
        type=str,
예제 #16
0
파일: explain.py 프로젝트: rbtsbg/lrv
def occlude(graph, jsonl, xgcn, adjacency, embedding, drop, step, padding,
            verbose, line_counter):
    """Masks most and least relevant edges and tests model performance with masked adjancency matrix."""
    xgcn.eval()
    xgcn.set_explainable(True)

    CLASSES = PubMedDataset.classes()
    relevance_flow = jsonl['relevance_flow']

    # normalize relevances layerwise, save in Explanation objects
    graph, explanations = normalize_explanations(graph, relevance_flow)

    # for the first and second layer, determine how much relevance mass was carried by each edge during LRP
    def explanations_to_relevance_matrices(explanations):
        for explanation in explanations:
            if explanation.relevances_prior is not None:
                graph = copy.deepcopy(explanation.graph)
                rel_matrix = relevance_matrix(
                    graph=graph,
                    relevances_prior=explanation.relevances_prior,
                    relevances_now=explanation.relevances)
                yield rel_matrix

    # note: relevance matrices in row->col edge direction
    relevance_matrices = explanations_to_relevance_matrices(explanations)
    relevance_matrices = list(relevance_matrices)
    assert len(relevance_matrices) == 2, 'Sanity check failed.'

    # normalize relevance matrices along layer dimensions, note: in row-col direction
    global_normalized_relevance_matrix = (
        relevance_matrices[0] + relevance_matrices[1]) / np.sum(
            np.sum(relevance_matrices[0] + relevance_matrices[1]))
    if not (np.isclose(np.sum(np.sum(global_normalized_relevance_matrix)),
                       1.0)):
        warnings.warn(
            f"After normalization sum of weights not close to 1 in line {line_counter}."
        )

    # now retrieve the edge relevances and their positions in the adjacency matrix, note: row->col edge direction
    edge_relevances_and_positions = []
    for edge in graph.edges:
        position = (edge[0].id - 1, edge[1].id - 1)
        weight = global_normalized_relevance_matrix[position[0]][position[1]]
        edge_relevances_and_positions.append((weight, position))

    # the following are the edges - their positions - ordered by the relevance they carried, note: row->col direction
    edge_weights_and_positions = sorted(edge_relevances_and_positions,
                                        key=lambda tup: tup[0],
                                        reverse=True)

    # now occlude and recored performance
    last_drop = None
    for ratio in np.arange(0., (drop + step), step=step):
        assert 0 <= ratio <= 1.0, 'Ratio out of range.'

        # mask for top k edges, note: in row->col direction
        mask_top, dropped_edges = get_mask(
            matrix=global_normalized_relevance_matrix,
            relevances_and_positions=edge_weights_and_positions,
            percentage=ratio)

        # do not repeat experiment with same number of edges dropped
        if last_drop is not None and last_drop == dropped_edges:
            continue
        else:
            last_drop = dropped_edges
        if verbose:
            log(f"Dropped {dropped_edges} weights of {len(edge_weights_and_positions)} at ratio {ratio}"
                )

        mask_top = padmat(mask_top, padding, zeros=False)
        # since adjacency matrix in col->row direction, transpose mask, which is currently in row->col direction
        mask_top = np.transpose(mask_top)
        mask_top = torch.from_numpy(mask_top)

        # mask for bottom k edges, note: in row->col direction
        mask_bottom, droped_edges_bottom = get_mask(
            matrix=global_normalized_relevance_matrix,
            relevances_and_positions=edge_weights_and_positions,
            percentage=ratio,
            top=False)

        mask_bottom = padmat(mask_bottom, padding, zeros=False)
        mask_bottom = np.transpose(mask_bottom)
        mask_bottom = torch.from_numpy(mask_bottom)

        assert (adjacency.size() == mask_top.size())
        assert (adjacency.size() == mask_bottom.size())

        # get new (masked) adjacency matrices
        a_masked_top = torch.mul(adjacency.double(), mask_top)
        a_masked_bottom = torch.mul(adjacency.double(), mask_bottom)

        # perform forward pass with new masked adjancency matrices
        pred_tensor_top = xgcn(embedding=embedding,
                               adjacency=a_masked_top.float())
        pred_tensor_bottom = xgcn(embedding=embedding,
                                  adjacency=a_masked_bottom.float())

        # determine predicted labels
        max_idx_top = pred_tensor_top.argmax()
        max_idx_bottom = pred_tensor_bottom.argmax()
        pred_label_top = CLASSES[max_idx_top.item()]
        pred_label_bottom = CLASSES[max_idx_bottom.item()]

        # sanity check: if nothing was occluded this should be the same prediction as in the original forward pass
        if ratio == 0:
            assert pred_label_top == jsonl['prediction'][
                'label'], "Different labels but drop ratio 0.0."
            assert pred_label_bottom == jsonl['prediction'][
                'label'], "Different labels but drop ratio 0.0."

        # save everything in the json line
        if 'occlusion' not in jsonl:
            jsonl['occlusion'] = {}

        jsonl['occlusion'][str(ratio)] = {}
        jsonl['occlusion'][str(ratio)]['dropped_edges'] = dropped_edges
        jsonl['occlusion'][str(ratio)]['top'] = {}
        jsonl['occlusion'][str(ratio)]['bottom'] = {}
        jsonl['occlusion'][str(ratio)]['top']['label'] = pred_label_top
        jsonl['occlusion'][str(ratio)]['bottom']['label'] = pred_label_bottom
        jsonl['occlusion'][str(ratio)]['top']['tensor'] = tensor_to_list(
            pred_tensor_top)
        jsonl['occlusion'][str(ratio)]['bottom']['tensor'] = tensor_to_list(
            pred_tensor_bottom)
    return jsonl
예제 #17
0
import pymysql
from helpers.log import log
from helpers.utils import remove_emoji
from config import HOSTD, USER, PASS, DB

sql_log = log('sql', 'sql.log', 'ERROR')


def get_connection() -> pymysql.Connection:
    """
    Function for getting connection data
    :return: <pymysql.connections.Connection>
    """
    return pymysql.connections.Connection(host=HOSTD,
                                          user=USER,
                                          password=PASS,
                                          db=DB,
                                          charset='utf8mb4')


def add_user(user_id, first_name, last_name, username):
    """
    Function for adding a user to DB
    :param user_id: <int> - a id of a user
    :param first_name: <str> or <None> - user's first name
    :param last_name: <str> or <None> - user's last name
    :param username: <str> or <None> - user's  nickname
    :return: <bool>
    """
    connection = get_connection()
    first_name = remove_emoji(first_name)
예제 #18
0
파일: main.py 프로젝트: rbtsbg/lrv
import argparse
import json
import subprocess

from helpers.config import config
from helpers.log import log

if  __name__ == '__main__':



    log('Pipeline invoked...')
    cfg = config('./config.json')

    parser = argparse.ArgumentParser()
    parser.add_argument('--preprocess', type=bool, default=cfg['pipeline']['preprocess'])
    parser.add_argument('--train', type=bool, default=cfg['pipeline']['train'])
    parser.add_argument('--explain', type=bool, default=cfg['pipeline']['explain'])
    parser.add_argument('--postprocess', type=bool, default=cfg['pipeline']['postprocess'])
    args = parser.parse_args()

    print(json.dumps(cfg, indent=2))

    if args.preprocess:
        subprocess.call(['python', 'preprocess.py'])

    if args.train:
        subprocess.call(['python', 'train.py'])

    if args.explain:
        subprocess.call(['python', 'explain.py'])
예제 #19
0
파일: postprocess.py 프로젝트: rbtsbg/lrv
    parser.add_argument('--path_in_explanations_jsonl', type=str, default=cfg['explain']['file_explanations_jsonl'])
    parser.add_argument('--path_out_top_masked_predictions', type=str,
                        default=cfg['postprocess']['occlusion_experiment']['path_out_top_masked_predictions'])
    parser.add_argument('--path_out_bottom_masked_predictions', type=str,
                        default=cfg['postprocess']['occlusion_experiment']['path_out_bottom_masked_predictions'])
    parser.add_argument('--draw_plot', type=bool, default=cfg['postprocess']['occlusion_experiment']['draw_plot'])
    parser.add_argument('--do_convert_to_latex', type=bool, default=cfg['postprocess']['latex']['doit'])
    parser.add_argument('--path_out_latex', type=str, default=cfg['postprocess']['latex']['path_out_latex'])
    parser.add_argument('--max_seq_len', type=int, default=cfg['postprocess']['latex']['max_seq_len'])
    parser.add_argument('--weight', type=float, default=cfg['postprocess']['latex']['weight'])
    parser.add_argument('--base', type=float, default=cfg['postprocess']['latex']['base'])
    parser.add_argument('--crop', type=int, default=cfg['postprocess']['latex']['crop'])
    args = parser.parse_args()

    if args.do_plot_occlusion_experiment:
        log('Summarizing occlusion experiments...')
        top, bottom = read_explanations(args.path_in_explanations_jsonl)
        res_top, percentages = occlusion_predictions(top)
        res_bottom, percentages = occlusion_predictions(bottom)
        f1_top = [f1_score(t[0], t[1], average='weighted') for t in res_top]
        # convert to csv
        f1_top = list(zip(percentages, f1_top))
        f1_top = [f'{tup[0]},{tup[1]}' for tup in f1_top]
        f1_top = '\n'.join(f1_top)
        f1_bottom = [f1_score(b[0], b[1], average='weighted') for b in res_bottom]
        f1_bottom = list(zip(percentages, f1_bottom))
        f1_bottom = [f'{tup[0]},{tup[1]}' for tup in f1_bottom]
        f1_bottom = '\n'.join(f1_bottom)
        with open(args.path_out_top_masked_predictions, 'w+') as fout:
            fout.write(f1_top)
            fout.close()