Beispiel #1
0
 def __init__(self, prefix):
     discord.ext.commands.Bot.__init__(self, command_prefix=prefix)
     self.http_client = urllib3.PoolManager()
     self.checkpoint = load_checkpoint('model.tar', 'cpu')
     self.vocabulary = load_vocabulary(self.checkpoint)
     self.model = load_model(self.checkpoint, self.vocabulary)
     self.model.eval()
     self.add_commands()
Beispiel #2
0
def evaluate_fine_tuned_model(saved_model_dir):
    mapping = {}

    for loss in EXP_LOSS:
        cp.print_warning("loss function : ", loss)
        sum = [0] * len(TARGET_CLASS)

        model_dir = os.path.join(saved_model_dir, loss)
        for i in os.listdir(model_dir):
            fine_tuned_model_dir = os.path.join(
                saved_model_dir, loss, i, '{}_fine_tune'.format(EXP_TYPE))

            for c in os.listdir(fine_tuned_model_dir):
                class_model_dir = os.path.join(fine_tuned_model_dir, c)
                latest_model = max(os.listdir(class_model_dir))
                fine_tuned_model = os.path.join(class_model_dir, latest_model,
                                                'model_best.pth')
                cp.print_warning("fine tuned model : ", fine_tuned_model)

                if not torch.cuda.is_available():
                    config = torch.load(fine_tuned_model,
                                        map_location='cpu')['config']
                else:
                    config = torch.load(fine_tuned_model)['config']

                target_class = [int(c)]
                config['model']['args']['num_classes'] = len(target_class) + 1

                # if "media" not in config['data_loader']['args']['data_dir']:
                #     config['data_loader']['args']['data_dir'] = "/media/brandon/SSD" + config['data_loader']['args']['data_dir']

                model, data_loader, loss_fn, metrics = evaluate.load_model(
                    config, fine_tuned_model, target_class)

                log = evaluate.evaluate(model, data_loader, loss_fn, metrics)

                sum[int(c)] += log['pred_acc']

        mapping[loss] = {}
        mapping[loss]['average_accuracy'] = []
        num_model = len(os.listdir(model_dir))

        for acc in sum:
            mapping[loss]['average_accuracy'].append(
                round((acc / num_model) * 100, 2))
        mapping[loss]['summed_accuracy'] = sum

        cp.print_warning('average fine tune model accuracy :',
                         mapping[loss]['average_accuracy'])
        cp.print_warning('summed fine tune model accuracy :', sum)

    return num_model, mapping
Beispiel #3
0
def evaluate_base_model(saved_model_dir):
    mapping = {}

    for loss in EXP_LOSS:
        cp.print_warning("loss function : ", loss)
        acc = []

        model_dir = os.path.join(saved_model_dir, loss)
        for i in tqdm(os.listdir(model_dir)):
            base_model_dir = os.path.join(saved_model_dir, loss, i,
                                          '{}_base'.format(EXP_TYPE))
            latest_model = max(os.listdir(base_model_dir))
            base_model = os.path.join(base_model_dir, latest_model,
                                      'model_best.pth')
            cp.print_warning("base model : ", base_model)

            if not torch.cuda.is_available():
                config = torch.load(base_model, map_location='cpu')['config']
            else:
                config = torch.load(base_model)['config']

            # if "media" not in config['data_loader']['args']['data_dir']:
            #     config['data_loader']['args']['data_dir'] = "/media/brandon/SSD" + config['data_loader']['args']['data_dir']

            config['metrics'] = ["pred_acc"]
            model, data_loader, loss_fn, metrics = evaluate.load_model(
                config, base_model, TARGET_CLASS)

            log = evaluate.evaluate(model, data_loader, loss_fn, metrics)

            acc.append(log['pred_acc'])

        mapping[loss] = {}
        mapping[loss]['average_accuracy'] = round(
            np.array(acc).mean() * 100, 2)
        mapping[loss]['raw_accuracy'] = acc

        cp.print_warning('average base model accuracy :',
                         mapping[loss]['average_accuracy'])
        cp.print_warning(mapping[loss]['raw_accuracy'])

    return len(os.listdir(model_dir)), mapping
Beispiel #4
0
    def __init__(self, args, model_path):
        import warnings

        warnings.filterwarnings("ignore")

        processor = CnerProcessor()
        label_list = processor.get_labels()
        num_labels = len(label_list)

        args.id2label = {i: label for i, label in enumerate(label_list)}
        args.label2id = {label: i for i, label in enumerate(label_list)}

        self.args = args
        self.device = torch.device(
            'cuda:{}'.format(args.device)
            if torch.cuda.is_available() and args.device != '-1' else 'cpu')

        self.tokenizer = BertTokenizer.from_pretrained(args.bert_path)
        self.model = load_model(args=args,
                                num_labels=num_labels,
                                model_path=model_path).to(self.device)
Beispiel #5
0
def evaluate_combined_model(saved_model_dir, num_iter, step_size):
    mapping = {}

    for loss in EXP_LOSS:
        cp.print_warning("loss function : ", loss)
        sum = [0] * int((len(TARGET_CLASS) / step_size))

        model_dir = os.path.join(saved_model_dir, loss)
        for i in tqdm(os.listdir(model_dir)):
            base_model_dir = os.path.join(saved_model_dir, loss, i,
                                          '{}_base'.format(EXP_TYPE))
            fine_tuned_model_dir = os.path.join(
                saved_model_dir, loss, i, '{}_fine_tune'.format(EXP_TYPE))
            latest_model = max(os.listdir(base_model_dir))
            base_model = os.path.join(base_model_dir, latest_model,
                                      'model_best.pth')
            cp.print_warning("base model for combined model : ", base_model)
            cp.print_warning("fine tuned model model for combined model : ",
                             base_model)

            if not torch.cuda.is_available():
                config = torch.load(base_model, map_location='cpu')['config']
            else:
                config = torch.load(base_model)['config']

            # if "media" not in config['data_loader']['args']['data_dir']:
            #     config['data_loader']['args']['data_dir'] = "/media/brandon/SSD" + config['data_loader']['args']['data_dir']

            config['metrics'] = ["pred_acc"]

            for _ in range(num_iter):
                ordered_class = TARGET_CLASS.copy()
                random.shuffle(ordered_class)

                target_class = []

                index = 0

                while len(ordered_class) > 0:
                    target_class += ordered_class[:step_size]
                    ordered_class = ordered_class[step_size:]

                    print('target_class', target_class)

                    model, data_loader, loss_fn, metrics = evaluate.load_model(
                        config, base_model, target_class)

                    model = evaluate.combine_model(model, fine_tuned_model_dir,
                                                   target_class)

                    log = evaluate.evaluate(model, data_loader, loss_fn,
                                            metrics)

                    sum[index] += log['pred_acc']
                    index += 1

        mapping[loss] = {}
        mapping[loss]['average_accuracy'] = []
        num_model = len(os.listdir(model_dir))

        for acc in sum:
            mapping[loss]['average_accuracy'].append(
                round((acc / (num_model * num_iter)) * 100, 2))
        mapping[loss]['summed_accuracy'] = sum

        cp.print_warning('average combined model accuracy :',
                         mapping[loss]['average_accuracy'])
        cp.print_warning('summed combined model accuracy :', sum)

    return num_model, mapping
Beispiel #6
0
from flask import render_template, request, flash, redirect, url_for
from webapp import app
from .forms import InputTextForm
import utils
import gensim
import evaluate
from text_manipulation import split_sentences

if utils.config['test']:
    word2vec = None
else:
    word2vec = gensim.models.KeyedVectors.load_word2vec_format(
        utils.config['word2vecfile'], binary=True)

model = evaluate.load_model()


def treat_text(raw_text):
    sentences = split_sentences(raw_text, 123)
    print(sentences)

    cutoffs = evaluate.predict_cutoffs(sentences, model, word2vec)
    total = []
    segment = []
    for i, (sentence, cutoff) in enumerate(zip(sentences, cutoffs)):
        segment.append(sentence)
        if cutoff:
            total.append(segment)
            segment = []

    return total
Beispiel #7
0
from utils.CheckpointUtils import load_checkpoint
from utils.ImageTransormation import transform


def caption(image_path):
    # Caption
    image = transform(image_path).unsqueeze(0)
    predicted_caption = caption_image(image, model, vocabulary)
    print(predicted_caption)


if __name__ == "__main__":
    checkpoint = load_checkpoint("coco.big.tar", 'cpu')

    vocabulary = load_vocabulary(checkpoint)
    model = load_model(checkpoint, vocabulary)
    model.eval()

    # Good
    caption("data/test2017/000000000016.jpg")
    caption("data/test2017/000000000057.jpg")
    caption("data/val2017/000000226662.jpg")
    caption("data/val2017/000000006763.jpg")

    # Semi Error
    caption("data/test2017/000000000019.jpg")
    caption("data/val2017/000000255965.jpg")
    caption("data/val2017/000000561256.jpg")
    caption("data/val2017/000000558073.jpg")
    caption("data/val2017/000000153217.jpg")
    caption("data/val2017/000000069213.jpg")
Beispiel #8
0
def main(train, generate, orchestrate):
    print('')

    if (train):
        # generate targets and store metadata
        if (generate):
            targetsJSON = generate_targets()
        else:
            try:
                # load targets metadata
                targetsJSON = json.load(
                    open(os.path.join(os.getcwd(), 'targets/metadata.json'),
                         'r'))
            except:
                targetsJSON = generate_targets()
            # if the project settings and data settings don't align, regenerate
            # this only happens if the import works
            if (targetsJSON['SAMPLES_PER_TARGET'] != SAMPLES_PER_TARGET
                    or targetsJSON['NUM_OF_TARGETS'] < NUM_OF_TARGETS
                    or targetsJSON['SAMPLE_RATE'] != SAMPLE_RATE):
                targetsJSON = generate_targets()
            # make targets metadata the same length as NUM_OF_TARGETS
            if (targetsJSON['NUM_OF_TARGETS'] != NUM_OF_TARGETS):
                targetsJSON['targets'] = targetsJSON[
                    'targets'][:NUM_OF_TARGETS]

        # format to torch datasets
        print('Preprocessing dataset... 📝')
        size_of_training_set = round(NUM_OF_TARGETS * 0.7)
        train_dataset = TargetsDataset(
            targetsJSON['targets'][:size_of_training_set],
            len(targetsJSON['all_labels']))
        test_dataset = TargetsDataset(
            targetsJSON['targets'][size_of_training_set:],
            len(targetsJSON['all_labels']))
        print('Dataset loaded! 🗄')

        # train the model
        final_model, accuracy = train_model(train_dataset, test_dataset,
                                            len(targetsJSON['all_labels']))

        # save model and settings
        export_path = os.path.join(
            os.getcwd(),
            f'models/model_{datetime.now().strftime("%d%m%y_%H%M")}')
        torch.save(final_model.state_dict(), f'{export_path}.pth')
        train_settings = export_settings()
        train_settings['Final Accuracy'] = accuracy
        train_settings['all_labels'] = targetsJSON['all_labels']
        with open(f'{export_path}.json', 'w') as json_file:
            json.dump(train_settings, json_file)
        print('Model saved! 📁')

    # orchestrate a user-defined sample
    if (orchestrate):
        try:
            # use the model just trained
            eval_model = final_model.eval()
            eval_settings = train_settings
        except:
            # or load an existing one
            eval_model, eval_settings = load_model()

        # get filepath and evaluate
        custom_target = click.prompt(
            'What is the filepath to the target sound?', type=str)[1:-1]
        orchestrate_target(eval_model, eval_settings, custom_target)

    print('')
Beispiel #9
0
def resultBtn_click():
    Vectortext.delete('1.0', END)
    textRes.delete('1.0', END)

    images = comboImages.get()
    videos = comboVideos.get()
    links = comboLinks.get()
    selfLinks = comboSelfLinks.get()

    dayVector, isWeekend = getDayArray(comboDay.current())
    # thmz = text.get("1.0",END)

    file_content = str(textContent.get("1.0", 'end-1c'))
    content = [word.strip(string.punctuation) for word in file_content.split()]
    while ("" in content):
        content.remove("")

    file_title = str(textTitle.get("1.0", 'end-1c'))
    title = [word.strip(string.punctuation) for word in file_title.split()]
    while ("" in title):
        title.remove("")

    numLinks = links  #Number of Links
    numSelfLinks = selfLinks  #Number of Self Links
    numVideos = videos  #Number of Videos
    numImages = images  #Number of Images

    countwordsT = countWords(title)  #Number of Words Title
    countwordsC = countWords(content)  #Number of Words Content
    ##
    countunique = countUnique(content)  #Number of Unique Words
    nonstopCount = nonStopCount(content)  # Number of non-Stop Words
    #print(nonstopCount)

    rateNonStopWords = 0.999999995192
    # Rate of non-Stop Words
    #
    rateUniqueNonStopWords = nonStopCount(
        uniqueWords(content)) / nonstopCount  # Rate of Unique non-Stop Words

    average_token_length = averageWordLength(content)  # Average Words Length

    global_subjectivity = TextBlob(' '.join(content)).subjectivity
    title_subjectivity = TextBlob(' '.join(title)).subjectivity
    global_sentiment_polarity = TextBlob(' '.join(content)).polarity
    title_sentiment_polarity = TextBlob(' '.join(title)).polarity

    #LDA = lda('content.txt')

    print('n_tokens_title =', countwordsT)
    print('n_tokens_content =', countwordsC)

    print('n_unique_tokens =', countunique)
    print('n_non_stop_words =', rateNonStopWords)
    print('n_non_stop_unique_tokens =', rateUniqueNonStopWords)

    print('num_href =', numLinks)
    print('num_self_href =', numSelfLinks)
    print('num_imgs =', numImages)
    print('num_videos =', numVideos)
    print('average_token_length =', average_token_length)

    num_keywords = num_keyword(" ".join(title))
    print('num_keywords =', num_keywords)

    print('weekday_is_monday =', dayVector[0])
    print('weekday_is_tuesday =', dayVector[1])
    print('weekday_is_wednesday =', dayVector[2])
    print('weekday_is_thursday =', dayVector[3])
    print('weekday_is_friday =', dayVector[4])
    print('weekday_is_saturday =', dayVector[5])
    print('weekday_is_sunday =', dayVector[6])
    print('is_weekend =', isWeekend)

    #    print('LDA00 =',LDA[0][1])
    #    print('LDA01 =',LDA[1][1])
    #    print('LDA02 =',LDA[2][1])
    #    print('LDA03 =',LDA[3][1])
    #    print('LDA04 =',LDA[4][1])

    print('global_subjectivity =', global_subjectivity)
    print('global_sentiment_polarity=', global_sentiment_polarity)

    global_rate_positive_words, global_rate_negative_words, rate_positive_words, rate_negative_words, avg_positive_polarity, min_positive_polarity, max_positive_polarity, avg_negative_polarity, min_negative_polarity, max_negative_polarity = PosNegInfo(
        content)

    abs_title_sub = abs_title_subjectivity(title_subjectivity)
    abs_title_sentiment_polarity = abs(title_sentiment_polarity)

    print('global_rate_positive_words =', global_rate_positive_words)
    print('global_rate_negative_words=', global_rate_negative_words)

    print('rate_positive_words=', rate_positive_words)
    print('rate_negative_words=', rate_negative_words)

    print('avg_positive_polarity=', avg_positive_polarity)
    print('min_positive_polarity=', min_positive_polarity)
    print('max_positive_polarity=', max_positive_polarity)

    print('avg_negative_polarity=', avg_negative_polarity)
    print('min_negative_polarity=', min_negative_polarity)
    print('max_negative_polarity=', max_negative_polarity)

    print('title_subjectivity=', title_subjectivity)
    print('title_sentiment_polarity=', title_sentiment_polarity)
    print('abs_title_subjectivity=', abs_title_sub)
    print('abs_title_sentiment_polarity=', abs_title_sentiment_polarity)

    print(type(numLinks))

    vectorX = []
    vectorX.append(countwordsT)
    vectorX.append(countwordsC)
    vectorX.append(countunique / countwordsC)
    vectorX.append(rateNonStopWords)
    vectorX.append(rateUniqueNonStopWords)
    vectorX.append(numLinks)
    vectorX.append(numSelfLinks)
    vectorX.append(numImages)
    vectorX.append(numVideos)
    vectorX.append(average_token_length)
    vectorX.append(num_keywords)
    vectorX.append(dayVector[0])
    vectorX.append(dayVector[1])
    vectorX.append(dayVector[2])
    vectorX.append(dayVector[3])
    vectorX.append(dayVector[4])
    vectorX.append(dayVector[5])
    vectorX.append(dayVector[6])
    vectorX.append(isWeekend)

    vectorX.append(0.437373579)
    vectorX.append(0.200363493)
    vectorX.append(0.033456789)
    vectorX.append(0.033403472)
    vectorX.append(0.295402666)

    vectorX.append(global_subjectivity)
    vectorX.append(global_sentiment_polarity)
    vectorX.append(global_rate_positive_words)
    vectorX.append(global_rate_negative_words)
    vectorX.append(rate_positive_words)
    vectorX.append(rate_negative_words)
    vectorX.append(avg_positive_polarity)
    vectorX.append(min_positive_polarity)
    vectorX.append(max_positive_polarity)
    vectorX.append(avg_negative_polarity)
    vectorX.append(min_negative_polarity)
    vectorX.append(max_negative_polarity)
    vectorX.append(title_subjectivity)
    vectorX.append(title_sentiment_polarity)
    vectorX.append(abs_title_sub)
    vectorX.append(abs_title_sentiment_polarity)

    vectorStr = str(countwordsT) + ' ' + str(countwordsC) + ' ' + str(
        countunique / countwordsC
    ) + ' ' + str(rateNonStopWords) + ' ' + str(
        rateUniqueNonStopWords
    ) + ' ' + numLinks + ' ' + numSelfLinks + ' ' + numImages + ' ' + numVideos + ' ' + str(
        average_token_length
    ) + ' ' + str(num_keywords) + ' ' + str(dayVector[0]) + ' ' + str(
        dayVector[1]
    ) + ' ' + str(dayVector[2]) + ' ' + str(dayVector[3]) + ' ' + str(
        dayVector[4]) + ' ' + str(dayVector[5]) + ' ' + str(
            dayVector[6]
        ) + ' ' + str(isWeekend) + ' ' + str(global_subjectivity) + ' ' + str(
            global_sentiment_polarity
        ) + ' ' + str(global_rate_positive_words) + ' ' + str(
            global_rate_negative_words
        ) + ' ' + str(rate_positive_words) + ' ' + str(
            rate_negative_words
        ) + ' ' + str(avg_positive_polarity) + ' ' + str(
            min_positive_polarity
        ) + ' ' + str(max_positive_polarity) + ' ' + str(
            avg_negative_polarity
        ) + ' ' + str(min_negative_polarity) + ' ' + str(
            max_negative_polarity) + ' ' + str(title_subjectivity) + ' ' + str(
                title_sentiment_polarity) + ' ' + str(
                    abs_title_sub) + ' ' + str(abs_title_sentiment_polarity)
    #   + str(LDA[0][1]) + ' '  + str(LDA[1][1]) + ' ' + str(LDA[2][1]) + ' ' + str(LDA[3][1]) + ' ' + str(LDA[4][1]) + ' '

    Vectortext.insert(END, vectorStr)

    print(vectorX)

    model = load_model("1")
    dataframe = pd.read_csv('baza.csv')
    dataset = dataframe.values
    X = dataset[:, 0:40].astype(float)
    std_scale = preprocessing.StandardScaler().fit(X)
    arrayVector = np.asarray(vectorX)
    arrayVector.reshape(1, -1)
    vector_std = std_scale.transform([arrayVector])

    Xnew = array([[
        16, 143, 0.706293701, 0.999999988, 0.891566254, 2, 1, 0, 1, 4.20979021,
        6, 0, 0, 0, 1, 0, 0, 0, 0, 0.865611392, 0.033610515, 0.033395503,
        0.034036846, 0.033345744, 0.478333333, -0.021666667, 0.027972028,
        0.027972028, 0.5, 0.5, 0.4125, 0.2, 0.8, -0.466666667, -0.7,
        -0.166666667, 0.55, -0.25, 0.05, 0.2
    ]])
    ynew = model.predict_classes(Xnew)
    print(ynew)
    textRes.insert(END, numToCategory(2))