Example #1
0
    def setUpClass(cls):
        compiler = Compiler("test/compile.sh")
        cls.tmp_dirs = {}
        for test_c, test_fn in c_files_list:
            d = tempfile.TemporaryDirectory()
            file_test = os.path.join("test", test_c)
            file_actual = os.path.join(d.name, "actual.c")
            file_base = os.path.join(d.name, "base.c")
            file_target = os.path.join(d.name, "target.o")

            actual_preprocessed = preprocess(file_test, cpp_args=["-DACTUAL"])
            base_preprocessed = preprocess(file_test, cpp_args=["-UACTUAL"])

            strip_other_fns_and_write(actual_preprocessed, test_fn,
                                      file_actual)
            strip_other_fns_and_write(base_preprocessed, test_fn, file_base)

            actual_source = Path(file_actual).read_text()
            target_o = compiler.compile(actual_source, show_errors=True)
            assert target_o is not None
            shutil.copy2(target_o, file_target)
            os.remove(target_o)

            shutil.copy2("test/compile.sh", d.name)
            cls.tmp_dirs[(test_c, test_fn)] = d
Example #2
0
def main():
    print("PREPROCESSING DATA")
    preprocess()
    print("LOADING TOKENIZER")
    tokenizer = get_tokenizer()
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer,
                                                    mlm=False)
    print("LOADING MODEL", cfg('model'))
    model = get_model(tokenizer)

    print("LOADING DATA")
    if cfg('encoding') == 'LBL':
        train_dataset = LBLDataset(tokenizer=tokenizer,
                                   file_path=filename('train'))
    elif cfg('encoding') == 'blocked':
        train_dataset = BlockedDataset(tokenizer=tokenizer,
                                       file_path=filename('train'))
    elif cfg('encoding') == 'text':
        train_dataset = TextDataset(tokenizer=tokenizer,
                                    file_path=filename('train'),
                                    block_size=cfg('max_block'))
    elif cfg('encoding').startswith('inter'):
        if cfg('encoding').endswith('LBL'):
            loader = LBLDataset
        elif cfg('encoding').endswith('blocked'):
            loader = BlockedDataset

        d1 = loader(tokenizer=tokenizer, file_path=filename('train'))
        d2 = loader(tokenizer=tokenizer, file_path=filename('dirty'))
        train_dataset = CombinedDataset(d1, d2)
    else:
        raise ValueError("Unkown encoding")

    trainer = get_trainer(train_dataset, data_collator, model)

    def validator(x, y):
        global BEST_metric
        model.save_pretrained(session)
        metric, pred = validate(model, tokenizer, x, y)
        if np.mean(metric) > BEST_metric:
            print("NEW BEST (saving)")
            BEST_metric = np.mean(metric)

        # save predicitions and model
        save(session + "metric.txt", str(metric) + "\n")
        save(session + "pred.txt", str(pred) + "\n\n")
        return metric, pred

    trainer.validator = validator
    trainer.val_dataset = get_validation_data()

    # saving configuration
    print("SAVING...")
    session = get_session_path()
    print(session)
    save(session + "conf.txt", repr(cfg()))

    print("STARTING TRAINING...")
    trainer.train()
Example #3
0
 def prepare_data(self) -> None:
     train_turns = preprocess(self.hparams.train_path,
                              self.hparams.ontology_path)
     val_turns = preprocess(self.hparams.val_path,
                            self.hparams.ontology_path)
     test_turns = preprocess(self.hparams.test_path,
                             self.hparams.ontology_path)
     self.train_dataset = MultiWozDSTDataset(train_turns, self.tokenizer)
     self.val_dataset = MultiWozDSTDataset(val_turns, self.tokenizer)
     self.test_dataset = MultiWozDSTDataset(test_turns, self.tokenizer)
Example #4
0
    def __init__(self, hparams: Namespace):
        super().__init__()
        self.hparams = hparams

        self.criterion = nn.CTCLoss()

        if Path(self.hparams.tokenizer_path).exists():
            self.tokenizer = WordLevelTokenizer(self.hparams.tokenizer_path)
        else:
            train_turns = preprocess(self.hparams.train_path,
                                     self.hparams.ontology_path)
            self.tokenizer = get_tokenizer(train_turns,
                                           self.hparams.tokenizer_path)

        # embedding
        self.embedding = nn.Embedding(self.tokenizer.get_vocab_size(),
                                      self.hparams.embedding_dim)
        self.pos_embedding = PositionalEncoding(
            d_model=self.hparams.embedding_dim, dropout=self.hparams.dropout)

        # value decoder
        self.value_decoder = nn.ModuleList([
            nn.MultiheadAttention(embed_dim=self.hparams.hidden_dim,
                                  num_heads=self.hparams.num_heads,
                                  dropout=self.hparams.dropout)
            for _ in range(3)
        ])
        self.vocab_proj = nn.Linear(self.hparams.hidden_dim,
                                    self.tokenizer.get_vocab_size())
def test_preprocess():
    ''' Test utils to preprocess sentinel2-cube'''
    from src.preprocess import preprocess
    import numpy as np
    from xarray.core.dataset import Dataset
    import pandas as pd
    t = 5
    B03 = np.random.rand(t, 100, 100)
    B08 = np.random.rand(t, 100, 100)
    CLP = np.zeros((t, 100, 100))

    lon = np.repeat(-99.83, 100).tolist()
    lat = np.repeat(42.25, 100).tolist()

    cube = Dataset(
        {
            'B03': (["time", 'lat', 'lon'], B03),
            'B08': (["time", 'lat', 'lon'], B08),
            'CLP': (["time", 'lat', 'lon'], CLP)
        },
        coords={
            'lon': lon,
            'lat': lat,
            'time': pd.date_range('2014-09-06', periods=t),
            'reference_time': pd.Timestamp('2014-09-05')
        })

    cube, background_ndwi = preprocess(cube,
                                       max_cloud_proba=0.1,
                                       nans_how='any',
                                       verbose=1,
                                       plot_NDWI=False)
    assert hasattr(cube, "NDWI")
    assert background_ndwi.name == "NDWI"
    assert background_ndwi.data.shape == (100, 100)
def train(args):
    """
    Train the final model

    Args:
        args (TYPE): Description

    Deleted Parameters:
        model_type (TYPE): Description
    """
    logger.info('train final model, min_count=%s, extract_method=%s',
                args.min_count, args.feature_type)

    # preprocess all data
    preprocess.preprocess(CORPUS_FILES,
                          skip_viTokenizer=False,
                          export_pos=False,
                          test_size=0.,
                          is_final=True)

    # build word2vec model
    w2v.train_model(min_count=args.min_count,
                    use_external_data=False,
                    is_final=True)

    # build tfidf model
    tfidf.train_tfidf(min_count=args.min_count, is_final=True)

    X_train, y_train, _, _ = compute_train_test_matrix(args.min_count,
                                                       args.feature_type,
                                                       is_final=True)

    params = {
        'criterion': 'gini',
        'min_samples_split': 5,
        'n_estimators': 200,
        'min_samples_leaf': 1,
        'max_depth': 110,
        'max_features': 'auto',
        'bootstrap': False
    }
    clf = RandomForestClassifier(**params,
                                 random_state=42,
                                 verbose=2,
                                 n_jobs=-1)
    clf.fit(X_train, y_train)
    joblib.dump(clf, helper.get_model_path('RandomForest', is_final=True))
Example #7
0
def test(data_dir):
    preprocess.preprocess(data_dir, False)
    model = '../bin/models/final_model.h5'
    test_generator = datagen.DataGenerator(constants.TEST_PROCESSED_DIR, constants.SEQUENCE_SIZE, constants.BATCH_SIZE, constants.CONTEXT_WINDOW_SIZE, 'x_[0-9]+.npy', 'y_[0-9]+.npy')

    note_f1s = []
    note_precisions = []
    note_recalls = []

    frame_f1s = []
    frame_precisions = []
    frame_recalls = []

    for sample in glob.glob(os.path.join(constants.TEST_PROCESSED_DIR, 'x_*')):
        isolated_filename = sample.split('/')[-1]
        
        gt_file= os.path.join(constants.TEST_PROCESSED_DIR,test_generator.corresponding_y(isolated_filename))
        referances = np.load(gt_file, mmap_mode='r')
        predictions = infer_from_processed(model, sample)
        
        note_precision, note_recall, note_f1 = get_note_evaluation(referances, predictions)
        frame_precision, frame_recall, frame_f1 = get_f1_score_frames(referances, predictions)
    
        note_f1s.append(note_f1)
        note_precisions.append(note_precision)
        note_recalls.append(note_recall)
        
        frame_f1s.append(frame_f1)
        frame_precisions.append(frame_precision)
        frame_recalls.append(frame_recall)

    avg_frame_f1 = np.mean(frame_f1s)
    avg_frame_precision = np.mean(frame_precisions)
    avg_frame_recall = np.mean(frame_recalls)

    avg_note_f1 = np.mean(note_f1s)
    avg_note_precision = np.mean(note_precisions)
    avg_note_recall = np.mean(note_recalls)

    print('Frame:')
    print([avg_frame_precision, avg_frame_recall, avg_frame_f1])
    print('Note:')
    print([avg_note_precision, avg_note_recall, avg_note_f1])
def main(bool_dict):
    """
    Launch all project steps.

    :param bool_dict: Dictionnary with step names as keys and boolean as values allowing to bypass steps. This can be
    useful to re-run all steps but model training steps if they are already done for example.
    :return:
    """
    download_file_from_url(
        files.GDP_ENERGY_DATA_URL,
        os.path.join(files.RAW_DATA, files.GDP_ENERGY_DATA_CSV))

    if bool_dict["preprocess"]:
        preprocess()

    if bool_dict["lin_reg_train"]:
        lin_reg_train()

    if bool_dict["evaluate"]:
        evaluate()
def sentiment_classification():
    """Run multi-label boardgame classification given boardgame description.
        ---
        parameters:
          - name: body
            in: body
            schema:
              id: description
              required:
                - description
              properties:
                description:
                  type: [string]
            description: the required boardgame description for POST method
            required: true
        definitions:
          SentimentResponse:
          Project:
            properties:
              status:
                type: string
              ml-result:
                type: object
        responses:
          40x:
            description: Client error
          200:
            description: Multi-label Boardgame Categorization
            examples:
                          [
{
  "status": "success",
  "sentiment": "1"
},
{
  "status": "error",
  "message": "Exception caught"
},
]
        """
    json_request = request.get_json() # Getting whatever you sent to service
    if not json_request:
        return Response("No json provided.", status=400) # If you sent nothing, we throw error
    description = json_request['description']
    if description is None:
        return Response("No text provided.", status=400)
    else:
        preprocessed_description = preprocess(description)
        predicted_categories = predict(preprocessed_description)
        return flask.jsonify({"status": "success", "predicted_categories": predicted_categories.tolist()}) # Returning an answer to the POST request; the .jsonify part will put HTTP status 200
def test(opt):
    testdata, testGenerator, classes = preprocess(path='./data' + os.sep +
                                                  opt.test,
                                                  batchsize=opt.batchsize,
                                                  imagesize=opt.imagesize,
                                                  shuffle=False)
    images = [
        i[0].split('/')[-1] for i in testGenerator.sampler.data_source.imgs
    ]
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = Vgg(num_channels=3,
                num_classes=200,
                depth=opt.model,
                conv1_1=False,
                initialize_weights=True).to(device)
    print(iter(testGenerator).__next__()[0].size())
    if opt.conv1_1 and opt.model:
        model = torch.load('./models' + os.sep + 'VdcnnIR_C11_' +
                           str(opt.model))
    else:
        model = torch.load('./models' + os.sep + 'VdcnnIR_' + str(opt.model))

    model.eval()
    total_testpredictions = []
    for idx_t, data_t in enumerate(testGenerator):
        data_t, _ = data_t[0], data_t[1]
        data_t = data_t.to(device)
        with torch.no_grad():
            prob_t = model(data_t)
            pred_t = np.argmax(prob_t.detach().cpu(), -1)
            total_testpredictions.extend(pred_t.tolist())
            print('Iter: [{}/{}]'.format(idx_t + 1, len(testGenerator)))

    if opt.conv1_1 and opt.model:
        with open('./results/model_{}_C11_Testpred'.format(opt.model),
                  'wb') as f:
            pickle.dump(total_testpredictions, f)
    else:
        with open('./results/model_{}_Testpred'.format(opt.model), 'wb') as f:
            pickle.dump(total_testpredictions, f)
    return images, total_testpredictions
Example #11
0
def main():
    # omoshiro_tweets = load_omoshiro_tweets_from_json(tweet_filename)
    valid_dataset_size = 128
    batch_size = 8
    config_path = 'cl-tohoku/bert-base-japanese-whole-word-masking'

    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    tokenizer = BertJapaneseTokenizer.from_pretrained(config_path)
    config = BertConfig.from_pretrained(config_path)

    pad_token_id = config.pad_token_id

    dataset = load_dataset(dataset_filename)

    train_dataset = dataset[:-128]
    valid_dataset = dataset[-128:]

    train_dataset, train_max_length = preprocess(train_dataset, tokenizer, config, batch_size=batch_size, device=device)
    train_dataset = normalize_dataset(train_dataset)
    valid_dataset, valid_max_length = preprocess(valid_dataset, tokenizer, config, batch_size=batch_size, device=device)

    valid_batches = mk_batches(dataset=valid_dataset, max_length=valid_max_length, batch_size=batch_size, device=device, pad=pad_token_id)

    print('Train dataset size is {}, Valid dataset size is {}'.format(len(train_dataset), len(valid_dataset)))

    model = BertPredictor(config_path=config_path, model_path=config_path)
    # model = Perceptron(vocab_size=tokenizer.vocab_size, hidden_size=128, device=device)

    model.to(device)

    criterion = torch.nn.CrossEntropyLoss(ignore_index=NEUTRAL)

    optimizer = optim.SGD(model.parameters(), lr=0.0001)

    for epoch in range(10):
        print('------ Epoch {} ------'.format(epoch + 1))

        train_batches = mk_batches(dataset=train_dataset, max_length=train_max_length, batch_size=batch_size, device=device, pad=pad_token_id)

        print('Train')
        model.train()
        accuracy = 0.0
        for i, batch in enumerate(train_batches):
            model.zero_grad()

            src = batch['src']
            tgt = batch['tgt']

            # output = [batch_size, vocab_size]
            output = model(src)

            loss = criterion(output, tgt)

            labels = torch.argmax(output, dim=-1)

            accuracy = ((labels == tgt).sum() + accuracy * i * batch_size) / ((i + 1) * batch_size) 

            loss.backward()
            optimizer.step()

            sys.stdout.write('\rLoss: {},  Accuracy: {}'.format(loss.item(), accuracy))
        
        # accuracy /= len(train_dataset)

        print('\nTrain accuracy {}'.format(accuracy))

        print('Validation')
        model.eval()
        with torch.no_grad():
            accuracy = 0.0
            for batch in valid_batches:
                src = batch['src']
                tgt = batch['tgt']

                output = model(src)

                labels = torch.argmax(output, dim=-1)

                accuracy += (labels == tgt).sum()
            
            accuracy /= valid_dataset_size
            print('Valid accuracy : {}'.format(accuracy))

    accuracy = 0.0
    for batch in valid_batches:
        accuracy += (JUN == batch['tgt']).sum()
    
    accuracy /= valid_dataset_size

    print('== JUN accuracy : {}'.format(accuracy))
Example #12
0
 def predict(self, model_input):
     model_input = preprocess(model_input)
     predicted_target = self.knn_clf.predict(model_input)
     predicted_target = postprocess(predicted_target)
     return predicted_target
Example #13
0
 def fit(self, train_x, train_y):
     train_x = preprocess(train_x)
     self.knn_clf.fit(train_x, train_y)
Example #14
0
    type=str,
    default='test',
    choices=['preprocess', 'train', 'test', 'predict', 'select'])
parser.add_argument('--gpu',
                    type=int,
                    default=0,
                    choices=[i for i in range(8)])
parser.add_argument('--config', type=str, default='config.yaml')

args = parser.parse_args()

config = yaml.safe_load(open(args.config, 'r', encoding='utf-8'))
config['gpu'] = args.gpu

if args.task == 'preprocess':
    from src.preprocess import preprocess
    preprocess(config)
elif args.task == 'train':
    from src.train import train_language_model
    train_language_model(config)
elif args.task == 'test':
    from src.test import test_language_model
    test_language_model(config)
elif args.task == 'predict':
    from src.predict import predict
    predict(config)
elif args.task == 'select':
    from src.select import select
    select(config)
else:
    raise ValueError('argument --task error')
Example #15
0
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)

    cv2.destroyAllWindows()

    if main.showSteps == True:
        cv2.imshow("0", imgOriginalScene)

    imgGrayscaleScene, imgThreshScene = preprocess.preprocess(imgOriginalScene)

    if main.showSteps == True:
        cv2.imshow("1a", imgGrayscaleScene)
        cv2.imshow("1b", imgThreshScene)

    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if main.showSteps == True:
        print("step 2 - len(listOfPossibleCharsInScene) = " + str(
            len(listOfPossibleCharsInScene)))

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:
            contours.append(possibleChar.contour)

        cv2.drawContours(imgContours, contours, -1, main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)

    listOfListsOfMatchingCharsInScene = detect_chars.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if main.showSteps == True:
        print("step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(
            len(listOfListsOfMatchingCharsInScene)))

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:
                contours.append(matchingChar.contour)

            cv2.drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))

        cv2.imshow("3", imgContours)

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)

        if possiblePlate.imgPlate is not None:
            listOfPossiblePlates.append(possiblePlate)

    print("\n" + str(len(listOfPossiblePlates)) + " possible plates found")

    if main.showSteps == True:
        print("\n")
        cv2.imshow("4a", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)

            cv2.line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), main.SCALAR_RED, 2)

            cv2.imshow("4a", imgContours)

            print("possible plate " + str(i) + ", click on any image and press a key to continue . . .")

            cv2.imshow("4b", listOfPossiblePlates[i].imgPlate)
            cv2.waitKey(0)

        print("\nplate detection complete, click on any image and press a key to begin char recognition . . .\n")
        cv2.waitKey(0)

    return listOfPossiblePlates
Example #16
0
def detectCharsInPlates(listOfPossiblePlates):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:  # if list of possible plates is empty
        return listOfPossiblePlates  # return

    for possiblePlate in listOfPossiblePlates:  # for each possible plate, this is a big for loop that takes up most of the function

        possiblePlate.imgGrayscale, possiblePlate.imgThresh = preprocess.preprocess(
            possiblePlate.imgPlate
        )  # preprocess to get grayscale and threshold images

        if main.showSteps == True:  # show steps ###################################################
            cv2.imshow("5a", possiblePlate.imgPlate)
            cv2.imshow("5b", possiblePlate.imgGrayscale)
            cv2.imshow("5c", possiblePlate.imgThresh)

        possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0),
                                             fx=1.6,
                                             fy=1.6)

        thresholdValue, possiblePlate.imgThresh = cv2.threshold(
            possiblePlate.imgThresh, 0.0, 255.0,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        if main.showSteps == True:  # show steps ###################################################
            cv2.imshow("5d", possiblePlate.imgThresh)

        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        if main.showSteps == True:  # show steps ###################################################
            height, width, numChannels = possiblePlate.imgPlate.shape
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]  # clear the contours list

            for possibleChar in listOfPossibleCharsInPlate:
                contours.append(possibleChar.contour)

            cv2.drawContours(imgContours, contours, -1, main.SCALAR_WHITE)

            cv2.imshow("6", imgContours)

        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(
            listOfPossibleCharsInPlate)

        if main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                for matchingChar in listOfMatchingChars:
                    contours.append(matchingChar.contour)

                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))

            cv2.imshow("7", imgContours)

        if (len(listOfListsOfMatchingCharsInPlate) == 0):

            if main.showSteps == True:
                print(
                    "chars found in plate number " + str(intPlateCounter) +
                    " = (none), click on any image and press a key to continue . . ."
                )
                intPlateCounter = intPlateCounter + 1
                cv2.destroyWindow("8")
                cv2.destroyWindow("9")
                cv2.destroyWindow("10")
                cv2.waitKey(0)

            possiblePlate.strChars = ""
            continue

        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            listOfListsOfMatchingCharsInPlate[i].sort(
                key=lambda matchingChar: matchingChar.intCenterX)
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(
                listOfListsOfMatchingCharsInPlate[i])

        if main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                del contours[:]

                for matchingChar in listOfMatchingChars:
                    contours.append(matchingChar.contour)

                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))

            cv2.imshow("8", imgContours)

        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]
                   ) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(
                    listOfListsOfMatchingCharsInPlate[i])
                intIndexOfLongestListOfChars = i

        longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[
            intIndexOfLongestListOfChars]

        if main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for matchingChar in longestListOfMatchingCharsInPlate:
                contours.append(matchingChar.contour)

            cv2.drawContours(imgContours, contours, -1, main.SCALAR_WHITE)

            cv2.imshow("9", imgContours)

        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, longestListOfMatchingCharsInPlate)

        if main.showSteps == True:
            print("chars found in plate number " + str(intPlateCounter) +
                  " = " + possiblePlate.strChars +
                  ", click on any image and press a key to continue . . .")
            intPlateCounter = intPlateCounter + 1
            cv2.waitKey(0)

    if main.showSteps == True:
        print(
            "\nchar detection complete, click on any image and press a key to continue . . .\n"
        )
        cv2.waitKey(0)

    return listOfPossiblePlates
Example #17
0
import cv2

image = X_train[6837]
Image.fromarray(image).save("writeup/before.png")
yuv = (255 * preprocess_image(image)).astype('uint8')
Image.fromarray(yuv[:, :, 0]).save("writeup/y.png")
Image.fromarray(yuv[:, :, 1]).save("writeup/u.png")
Image.fromarray(yuv[:, :, 2]).save("writeup/v.png")
rgb = cv2.cvtColor(yuv, cv2.COLOR_YUV2RGB)
Image.fromarray(rgb).save("writeup/after.png")

for i in np.arange(23205, 23210):
    im = Image.fromarray(X_train[i])
    im.save("writeup/yield/" + str(i) + ".png")

X_train, y_train = preprocess(X_train, y_train)
X_valid, y_valid = preprocess(X_valid, y_valid)

from sklearn.utils import shuffle
shuffle(X_train, y_train)

# Step 2B: Architecture
from src.architecture import LeNet

# Step 2C: Train the data

import tensorflow as tf
from sklearn.utils import shuffle

EPOCHS = 20
BATCH_SIZE = 4096
logging.basicConfig(filename='log/log_preprocess.log',
                    format='%(asctime)s : %(levelname)s : %(message)s',
                    level=logging.INFO)
parser = argparse.ArgumentParser()

if __name__ == '__main__':
    parser.add_argument('corpus',
                        choices=['def', 'ex', 'both'],
                        help='choose type of corpus to preprocess')
    parser.add_argument('-pos',
                        '--export_pos',
                        action='store_true',
                        help='export postagging')
    parser.add_argument(
        '-fn',
        '--is_final',
        action='store_true',
        help='preprocess for final model (not split to train/test)')

    args = parser.parse_args()
    if args.corpus in ['def', 'both']:
        preprocess.preprocess(CORPUS_FILES,
                              skip_viTokenizer=False,
                              export_pos=args.export_pos,
                              is_final=args.is_final)
    if args.corpus in ['ex', 'both']:
        preprocess.preprocess(EXTERNAL_CORPUS_FILES,
                              skip_viTokenizer=True,
                              export_pos=args.export_pos,
                              is_final=args.is_final)
def train(opt):
    traindata, trainGenerator, classes = preprocess(path='./data' + os.sep +
                                                    opt.train,
                                                    batchsize=opt.batchsize,
                                                    imagesize=opt.imagesize,
                                                    shuffle=True)
    valdata, validationGenerator, classes = preprocess(path='./data' + os.sep +
                                                       opt.val,
                                                       batchsize=opt.batchsize,
                                                       imagesize=opt.imagesize,
                                                       shuffle=True)
    # print(iter(trainGenerator).__next__())

    num_channels = iter(trainGenerator).__next__()[0].size()[1]
    if opt.conv1_1 and opt.depth == 16:
        path_t = 'results/VdcnnIR_train_C11_{}.txt'.format(opt.depth)
        path_v = 'results/VdcnnIR_val_C11_{}.txt'.format(opt.depth)
    else:
        path_t = 'results/VdcnnIR_train_{}.txt'.format(opt.depth)
        path_v = 'results/VdcnnIR_val_{}.txt'.format(opt.depth)
    if os.path.exists(path_t):
        os.remove(path_t)
        os.mknod(path_t)
    else:
        os.mknod(path_t)
    if os.path.exists(path_v):
        os.remove(path_v)
        os.mknod(path_v)
    else:
        os.mknod(path_v)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if opt.conv1_1 and opt.depth == 16:
        model = Vgg(num_channels=num_channels,
                    num_classes=classes,
                    depth=opt.depth,
                    initialize_weights=True,
                    conv1_1=opt.conv1_1).to(device)

    else:
        model = Vgg(num_channels=num_channels,
                    num_classes=classes,
                    depth=opt.depth,
                    initialize_weights=True,
                    conv1_1=False).to(device)
    # return model
    optimizer = optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()

    def plot_fig(train_loss, val_loss):
        plt.figure(figsize=(10, 8))
        if opt.conv1_1 and opt.depth == 16:
            plt.title("{}_C11 layer model".format(opt.depth))
        else:
            plt.title("{} layer model".format(opt.depth))
        plt.plot(train_loss, label='Train_loss')
        plt.plot(val_loss, label='Val_loss')
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        if opt.conv1_1 and opt.depth == 16:
            plt.savefig("figures/trainVal_loss_C11_{}.png".format(opt.depth))
        else:
            plt.savefig("figures/trainVal_loss_{}.png".format(opt.depth))

        # plt.show()
        # plt.close()
        return None

    totalVal_loss = []
    totalTrain_loss = []
    early_stop = False
    count = 0
    best_score = None

    for epoch in range(opt.epochs):
        model.train()
        train_loss = []
        total_predictions = []
        total_labels = []
        for idx, data in enumerate(trainGenerator):
            data_, label = data[0], data[1]
            data_ = data_.to(device)
            label = label.to(device)
            # print(data_.size())
            optimizer.zero_grad()
            prob = model(data_)
            # print(prob)
            prob_ = np.argmax(prob.detach().cpu(), -1)
            loss = criterion(prob, label)
            train_loss.append(loss.item() * len(label.cpu()))
            loss.backward()
            optimizer.step()
            total_predictions.extend(prob_)
            total_labels.extend(label.cpu())
            print('Iter: [{}/{}]\t Epoch: [{}/{}]\t Loss: {}\t Acc: {}'.format(
                idx + 1, len(trainGenerator), epoch + 1, opt.epochs,
                loss.item(), metrics.accuracy_score(label.cpu(), prob_)))

        loss_epoch = sum(train_loss) / len(traindata)
        totalTrain_loss.append(loss_epoch)
        with open(path_t, 'a') as f:
            f.write('Epoch: {}\t Loss: {}\t Accuracy: {}\n'.format(
                epoch + 1, loss_epoch,
                metrics.accuracy_score(total_labels, total_predictions)))

        model.eval()
        val_loss = []
        total_Valpredictions = []
        total_ValLabels = []
        for idx_e, data_e in enumerate(validationGenerator):
            data_e, label_e = data_e[0], data_e[1]
            data_e = data_e.to(device)
            label_e = label_e.to(device)
            with torch.no_grad():
                prob_e = model(data_e)
                loss_v = criterion(prob_e, label_e)
                pred_e = np.argmax(prob_e.detach().cpu(), -1)
                val_loss.append(loss_v.item() * len(label_e.cpu()))
                total_ValLabels.extend(label_e.cpu())
                total_Valpredictions.extend(pred_e)
                print('Iter: [{}/{}]\t Epoch: [{}/{}]\t Loss: {}\t Acc: {}'.
                      format(idx_e + 1, len(validationGenerator), epoch + 1,
                             opt.epochs, loss_v.item(),
                             metrics.accuracy_score(label_e.cpu(), pred_e)))
        val_lossEpoch = sum(val_loss) / len(valdata)

        totalVal_loss.append(val_lossEpoch)
        with open(path_v, 'a') as f:
            f.write('Epoch: {}\tLoss: {}\tAccuracy: {}\n'.format(
                epoch + 1, val_lossEpoch,
                metrics.accuracy_score(total_ValLabels, total_Valpredictions)))
        # roc_fig = scikitplot.metrics.plot_roc(total_ValLabels, total_Valpredictions, figsize=(12, 12))
        # plt.savefig('figures/ROC_{}.png'.format(opt.depth))
        # plt.show()
        plot_fig(train_loss=totalTrain_loss, val_loss=totalVal_loss)
        # print(loss_fig)
        if best_score is None:
            best_score = val_lossEpoch
            if opt.conv1_1 and opt.depth == 16:
                torch.save(model, 'models/VdcnnIR_C11_{}'.format(opt.depth))
            else:
                torch.save(model, 'models/VdcnnIR_{}'.format(opt.depth))
        elif val_lossEpoch > best_score:
            print("Loss:{} doesn't decreased from {}".format(
                val_lossEpoch, best_score))
            count += 1
            if count >= opt.early_stopping:
                early_stop = True
        elif val_lossEpoch < best_score:
            print("Loss:{} decreased from {}. Saving model........".format(
                val_lossEpoch, best_score))
            best_score = val_lossEpoch
            if opt.conv1_1 and opt.depth == 16:
                torch.save(model, 'models/VdcnnIR_C11_{}'.format(opt.depth))
            else:
                torch.save(model, 'models/VdcnnIR_{}'.format(opt.depth))
            count = 0
        if early_stop:
            break
        model.train()
    losses = {'trainLoss': totalTrain_loss, 'valLoss': totalVal_loss}
    if opt.conv1_1 and opt.depth == 16:
        with open('results/losses_C11_{}'.format(opt.depth), 'wb') as f:
            pickle.dump(losses, f)
    else:
        with open('results/losses_{}'.format(opt.depth), 'wb') as f:
            pickle.dump(losses, f)
    return best_score