def plot_spectrograms(data, config):
    """
        This function makes a plot of 4 random spectrograms and their respective histograms
        cube (np.array) the processed data loaded in train.py
    """
    fig, axs = plt.subplots(4, 7, figsize=(10, 10))

    # Get all the correct cubes
    p = preprocessor(data)
    p.get_magnitude_and_phase()
    mag, phase = p.get_processed_cube()[..., 0:1], p.get_processed_cube()[...,
                                                                          1:2]

    p = preprocessor(data)
    p.interp(config['n_frequencies'], config['n_time_steps'])
    p.get_magnitude_and_phase()
    mag_interp, phase_interp = p.get_processed_cube()[
        ..., 0:1], p.get_processed_cube()[..., 1:2]

    p = preprocessor(data)
    p.interp(config['n_frequencies'], config['n_time_steps'])
    p.get_magnitude()
    p.median_threshold()
    p.minmax(per_baseline=True,
             feature_range=(np.min(phase_interp), np.max(phase_interp)))
    mag_interp_thresh = p.get_processed_cube()

    for i in range(4):
        r = np.random.randint(0, data.shape[0])
        im = axs[i, 0].imshow(data[r, ..., 0])
        axs[i, 0].title.set_text('Real Component')
        plt.colorbar(im, ax=axs[i, 0])

        im = axs[i, 1].imshow(data[r, ..., 1])
        axs[i, 1].title.set_text('Imaginary Component')
        plt.colorbar(im, ax=axs[i, 1])

        im = axs[i, 2].imshow(mag[r, ..., 0])
        axs[i, 2].title.set_text('Magnitude Component')
        plt.colorbar(im, ax=axs[i, 2])

        im = axs[i, 3].imshow(phase[r, ..., 0])
        axs[i, 3].title.set_text('Phase Component')
        plt.colorbar(im, ax=axs[i, 3])

        im = axs[i, 4].imshow(mag_interp[r, ..., 0])
        axs[i, 4].title.set_text('Magnitude component interpolated')
        plt.colorbar(im, ax=axs[i, 4])

        im = axs[i, 5].imshow(phase_interp[r, ..., 0])
        axs[i, 5].title.set_text('Phase component interpolated')
        plt.colorbar(im, ax=axs[i, 5])

        im = axs[i, 6].imshow(mag_interp_thresh[r, ..., 0])
        axs[i, 6].title.set_text(
            'Magnitude component interpolated and thresholded')
        plt.colorbar(im, ax=axs[i, 6])

    return plt
def data_generator(num_files ):
    first_flag = False 
    ms_files = get_files(filter='None')

    for i in tqdm(range(0,num_files)):
        c = next(ms_files)
        cubes = get_cube(c)
        p = preprocessor.preprocessor(cubes)
        p.interp(32, 128)

        if not first_flag:
            output = p.get_processed_cube()
            first_flag = True
        else: output = np.concatenate((output,p.get_processed_cube()),axis=0)

    if not os.path.exists('datasets'):
        os.mkdir('datasets')

    info = {'Description':'LOFAR training set',
            'Features':'Unlabelled' ,
            'Dimensions':(32,128),
            'Source':'LOFAR MS'}


    f_name = 'datasets/LOFAR_dataset_{}.pkl'.format(datetime.datetime.now().strftime("%d-%m-%Y"))
    pickle.dump([output,np.zeros([1,1,1,1]), np.zeros([1,1,1,1]),np.zeros([1,1,1,1]),info],
            open(f_name, 'wb'), protocol=4)
    print('{} Saved!'.format(f_name))
Exemple #3
0
    def test_empty_lines_are_removed(self):
        grammar = """


        """
        self.assertEqual("\n" + preprocessor(grammar),
                         "" + self.permanent_suffix)
def evaluateSavedModel():
    # Choose saved model to evaluate
    model = load_model("saved/Piczak_CNN_pretrainsingle_trainmulti.h5")
    # Setup preprocessor for loading extracted features
    pp = preprocessor(parent_dir='../UrbanSound8K/audio')

    # extracted features that should be loaded to calculate mean and std values
    train_dirs = [
        "audio_overlap/folder1_overlap", "audio_overlap/folder3_overlap",
        "audio_overlap/folder4_overlap", "audio_overlap/folder5_overlap",
        "audio_overlap/folder6_overlap", "audio_overlap/folder7_overlap",
        "audio_overlap/folder8_overlap", "audio_overlap/folder9_overlap",
        "audio_overlap/folder10_overlap"
    ]

    #pp.data_prep(train_dirs=[], test_fold="fold2", load_path="../UrbanSound8K/audio/extracted_short_60/")
    # Load features
    test_folder = "fold2"
    pp.load_extracted_fts_lbs(train_dirs=train_dirs, test_fold=test_folder)

    tb = TensorBoard(
        log_dir=
        './TensorBoard/piczak_CNN_singlelabel_pretrain_continue_multilabel')
    # model.fit(pp.train_x, pp.train_y,validation_split=.1, epochs=25,
    #		  batch_size=256, verbose=2, callbacks=[tb])

    print("model evaluation")
    scores = model.evaluate(pp.test_x, pp.test_y, verbose=2)
    print("loss: {0}, test-acc: {1}".format(scores[0], scores[1]))

    # Make predictions
    preds = model.predict(pp.test_x)

    # Evaluate predictions
    evaluateModel(pp, preds, test_folder)
Exemple #5
0
    def test_parent_hierarchy(self):
        semtypes = {"#foo": 1}
        grammar = "FOO: BAR"
        self.assertEqual(
            preprocessor(grammar, semtypes), """FOO: "#foo" | (BAR)
eps_foo: FOO | empty_foo
empty_foo: """ + self.permanent_suffix)
Exemple #6
0
	def explain_timestep_distribution(self):
		"""
		Goal: Display info about the distribution of timesteps
		"""
		from scipy.stats import norm
		from scipy.stats import mode

		if not os.path.isfile(self.DATA_FILE_PATH): 
			p = preprocessor.preprocessor()
			p.preprocess(start_anew=True, quick_validation=True, display_epochs=False)
		data = h5py.File(self.DATA_FILE_PATH, 'r')
		timesteps = []
		for key in data.keys():
			timestep = data[key]['eeg'].shape[0]
			timesteps.append(float(timestep))
		
		# code adapted from http://stackoverflow.com/questions/20011122/fitting-a-normal-distribution-to-1d-data
		# Fit a normal distribution to the data:
		mu, std = norm.fit(timesteps)
		print("{} examples. Shortest # samples is {}. Longest # samples is {}. Mean is {:.2f}. Mode is {}. Standard Deviation is {:.2f}.".format(len(timesteps), min(timesteps), max(timesteps), mu, mode(timesteps)[0], std))
		plt.hist(timesteps, bins=100, normed=True, alpha=0.6, color='g')

		# # Plot the PDF.
		# xmin, xmax = plt.xlim()

		# x = np.linspace(xmin, xmax, 100)
		# p = norm.pdf(x, mu, std)
		# plt.plot(x, p, 'k', linewidth=2)
		# samapling frequency -> 1100 samples per second * x seconds = 2500 samples, solve for x
		title = "Fit results (# samples at 1100 Hz sfreq): mu = %.2f,  std = %.2f" % (mu, std)
		plt.title(title)
		plt.show()
		data.close()	
	def parse(self, string):
		global err,syn_error
		string2 = preprocessor.preprocessor(string)
		if (string != string2):
			print("Program code after preprocessing: ", string2)
			string=string2
		from preprocessor import err as err
		from preprocessor import syn_error as syn_error
		#print(string)
		lexer=MU0Parser.__Lexer()
		self.parser.parse(string, lexer=lexer.lexer)
		#print(syn_error,err)
		if MU0Parser.s.length()==0 and MU0Parser.brstack.length()==0 and MU0Parser.contstack.length()==0 and MU0Parser.forstack.length()==0 and syn_error==False:
			err=''
			err=err+"No syntax errors detected."
			print("No syntax errors detected.")

		else:
			if MU0Parser.s.length()==0:
				print("Syntax error detected - continue/break used outside loop")
			else: 
				print("Syntax error detected.")

		ins=MU0Parser.ins
		data=MU0Parser.data
		self.restart()

		return ins + list(data)
Exemple #8
0
 def __init__(self):
     self.filename = os.path.join(os.path.expandvars("%appdata%"),
                                  "latex-access.conf")
     self.speech_translator = speech.speech()
     self.preprocessor = preprocessor.preprocessor()
     self.activateSettings()
     self.newcommands = preprocessor.newcommands(self.preprocessor)
Exemple #9
0
    def test_if_epsilon_nonterminal_was_added_for_terminal(self):
        grammar1 = """
            TERMINAL: "foo"
        """
        self.assertEqual(
            preprocessor(grammar1).strip(),
            'TERMINAL:("foo")\neps_terminal: TERMINAL | empty_terminal\nempty_terminal: '
            + self.permanent_suffix)

        grammar2 = """
            TERMINAL: "foo"
            TERMINAL: "bar"
        """
        self.assertEqual(
            preprocessor(grammar2).strip(),
            'TERMINAL:("foo")|("bar")\neps_terminal: TERMINAL | empty_terminal\nempty_terminal: '
            + self.permanent_suffix)
Exemple #10
0
 def test_if_coordination_is_added_for_single_suffix(self):
     grammar = """
     t_attr_single: t_quality* ATTR
     """
     self.assertEqual(
         preprocessor(grammar),
         "t_attr_single:(t_quality* ATTR)\nt_attr: (t_attr_single) | ((t_attr_single \",\")+ t_attr_single) | ((t_attr_single \",\")* t_attr_single COORD_A t_attr_single)"
         + self.permanent_suffix)
Exemple #11
0
 def test_merge_of_two_left_sides(self):
     """ Test if the left sides are merged if they are across multiple lines """
     grammar = """
         sentence: foo
         sentence: bar
     """
     self.assertEqual(
         preprocessor(grammar).strip(),
         'sentence:(foo)|(bar)' + self.permanent_suffix)
def to_magnitude(complex_data):
    data = np.array([complex_data.real, complex_data.imag])
    data = np.swapaxes(np.array(data), 0, -1)
    p = preprocessor(np.expand_dims(data, axis=0))
    p.interp(32, 128)  # interpolate
    p.get_magnitude()
    return p.get_processed_cube()[
        0,
        ...]  # dont want the first component as this comes when we concatenate
Exemple #13
0
 def test_order_of_rules_is_untouched(self):
     """ Test if the content of rule is untouched """
     grammar = """
     sentence: foo
     foo: bar
     """
     self.assertEqual(
         preprocessor(grammar).strip(),
         'sentence:(foo)\nfoo:(bar)' + self.permanent_suffix)
def piczac_cross_validation(epochs, load_path):
    train_dirs = []

    n_folders = 10

    for i in range(1, n_folders + 1):
        #train_dirs.append('fold{0}'.format(i))
        train_dirs.append('folder{0}_overlap'.format(i))

    print(train_dirs)
    for fold in ((10, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7),
                 (7, 8), (8, 9), (9, 10)):
        val_fold = 'folder{0}_overlap'.format(fold[0])
        test_fold = 'folder{0}_overlap'.format(fold[1])
        #val_fold = 'fold{0}'.format(fold[0])
        #test_fold = 'fold{0}'.format(fold[1])
        train_dirs.remove(val_fold)
        train_dirs.remove(test_fold)

        pp = preprocessor(parent_dir='../../data/UrbanSound8K/audio')
        pp.load_extracted_fts_lbs(train_dirs=train_dirs,
                                  val_fold=val_fold,
                                  test_fold=test_fold,
                                  load_path=load_path)

        model = piczak_CNN_multi(input_dim=pp.train_x[0].shape,
                                 output_dim=pp.train_y.shape[1])
        print("done")
        print("OPTIMIZER")
        #print(model.optimizer.lr)
        #K.set_value(model.optimizer.lr, 0.002)
        #model.optimizer.lr.set_value(0.0001)
        #model.save('Models/model1_all_p2_bn{0}.h5'.format(str(fold)))
        #model = load_model('Models/model1_all_p2{0}.h5'.format(str(fold)))
        #model = load_model('Models/model1_all_p2_bnsec_overlap_{0}.h5'.format(str(fold)))

        tb = TensorBoard(log_dir='./TensorBoard/' +
                         'overlap_run{0}'.format(fold[1]))
        es = EarlyStopping(patience=10, verbose=1)

        model.fit(pp.train_x,
                  pp.train_y,
                  validation_data=[pp.val_x, pp.val_y],
                  epochs=epochs,
                  batch_size=1000,
                  verbose=2,
                  callbacks=[tb, es])
        #model.save('Models/model1_all_p2_bnsec_overlap_9010_{0}.h5'.format(str(fold)))

        preds = model.predict(pp.test_x)
        evaluateModel(pp, preds, fold)

        K.clear_session()

        train_dirs.append(val_fold)
        train_dirs.append(test_fold)
Exemple #15
0
 def test_merge_of_two_left_sides_with_inserted_lines(self):
     grammar = """
         sentence: foo
         // this is my comment
         foo: bar
         sentence: bar
     """
     self.assertEqual(
         preprocessor(grammar).strip(),
         'sentence:(foo)|(bar)\nfoo:(bar)' + self.permanent_suffix)
Exemple #16
0
def plot_scatter(autoencoder, data):
    """
        This function plots a 2d scatter plot with the data superimposed over each point
        autoencoder (keras.model) the autoencoder based model 
        data (np.array) the preprocessed training data that is in a list format. With the first index being magnitude and the second phase. 
    """
    plt.rcParams['image.cmap'] = 'viridis'
    encoder,mag_phase_flag  = load_encoder(autoencoder)

    if not mag_phase_flag:
        mag_data = data
        p = preprocessor(mag_data)
        it = 1
    else: 
        mag_data = data
        mag_data = [mag_data[0],
                    mag_data[1]]
        it = 2

    fig,ax = plt.subplots(1,it,figsize=(20,10));
    for i in range(it):
        p = preprocessor(mag_data[i])

        embeddings,_,_ =  encoder.predict(mag_data)
        p.interp(20,20)
        _data = p.get_processed_cube()

        for x, y, image_path in zip(embeddings[:,0], embeddings[:,1], _data[...,0]):
           imscatter(x, y, image_path, zoom=0.7, ax=ax[i]) 
        if it == 1: ax = [ax] # a hack to deal with single index
        ax[i].title.set_text(titles[i]);
        ax[i].grid();
        ax[i].set_xlim([-6,6])
        ax[i].set_ylim([-6,6])

    plt.suptitle('Scatter Plot of Embedding with Inputs Overlayed');
    plt.savefig('/tmp/temp.png',dpi=600)
    img=mpimg.imread('/tmp/temp.png')
    fig = plt.figure(figsize=(10, 10))
    plt.imshow(img) 
    plt.axis('off')
    return plt
Exemple #17
0
	def num_examples(self):
		"""
		Goal: Return the number of examples in the data file
		"""
		if not os.path.isfile(self.DATA_FILE_PATH): 
			p = preprocessor.preprocessor()
			p.preprocess(start_anew=True, quick_validation=True, display_epochs=False)
		data = h5py.File(self.DATA_FILE_PATH, 'r')
		num_examples = len(list(data.keys()))
		data.close()
		return num_examples
def plot_preprocessing(data):
    fig, ax = plt.subplots(1, 4)
    r = randint(0, data.shape[0])

    p = preprocessor(data)
    p.interp(config['n_frequencies'],
             config['n_time_steps'])  # always interpolate
    p.get_magnitude_and_phase()

    d = p.get_cube()

    #Interpolated cube
    im = ax[0].imshow(d[r, ..., 0])
    ax[0].title.set_text('Original Interpolated Image')
    fig.colorbar(im, ax=ax[0])

    #Magnitude of original cube
    p.get_magnitude_and_phase()
    p_cube = p.get_processed_cube()
    im = ax[1].imshow(p_cube[r, ..., 0])
    fig.colorbar(im, ax=ax[1])
    ax[1].title.set_text('Magnitude of Interpolated Image')

    p.sigma_threshold(2)

    # Standardised cube
    p.standardise(per_baseline=config['per_baseline'])
    s_cube = p.get_processed_cube()
    im = ax[2].imshow(s_cube[r, ..., 0])
    ax[2].title.set_text('Standardised Interpolated Image')
    fig.colorbar(im, ax=ax[2])

    # Minmax scaled cube
    p = preprocessor(p_cube)
    p.minmax(per_baseline=config['per_baseline'])
    m_cube = p.get_processed_cube()
    im = ax[3].imshow(m_cube[r, ..., 0])
    ax[3].title.set_text('Min Max Interpolated Image')
    fig.colorbar(im, ax=ax[3])

    return plt
Exemple #19
0
def filter_and_dict(table_of_strings, stop_words):
        data = []
        for tweet in table_of_strings:
            tweet_words = preprocessor().preprocess(tweet[0], stop_words)
            temp_dict = {}
            for word in tweet_words:
                if word in temp_dict.keys():
                    temp_dict[word] = temp_dict[word] + 1
                else:
                    temp_dict[word] = 1
            data.append(temp_dict)
        return data
Exemple #20
0
def filter_and_dict(table_of_strings, stop_words):
    data = []
    for tweet in table_of_strings:
        tweet_words = preprocessor().preprocess(tweet[0], stop_words)
        temp_dict = {}
        for word in tweet_words:
            if word in temp_dict.keys():
                temp_dict[word] = temp_dict[word] + 1
            else:
                temp_dict[word] = 1
        data.append(temp_dict)
    return data
Exemple #21
0
    def test_generate_merged_terminals_wo_naives(self):
        semtypes = {'#floskule^#measure': 1}
        grammar = ""

        self.assertEqual(
            preprocessor(grammar, semtypes),
            """FLOSKULE: "#floskule" | "#floskule^#measure"
MEASURE: "#floskule^#measure" | "#measure"
eps_floskule: FLOSKULE | empty_floskule
empty_floskule: 
eps_measure: MEASURE | empty_measure
empty_measure: """ + self.permanent_suffix)
Exemple #22
0
def piczac_cross_validation(epochs, load_path):
    train_dirs = []

    n_folders = 10
    for i in range(1, n_folders + 1):
        train_dirs.append('fold{0}'.format(i))

    cvscores = []
    for folds in [(9, 10)]:
        val_fold = 'fold' + str(folds[0])
        test_fold = 'fold' + str(folds[1])
        # Remove validation and test from train
        train_dirs.remove(val_fold)
        train_dirs.remove(test_fold)

        print("Run {0}: test folder is fold{0}".format(folds[1]) +
              ", validation folder is fold{0}".format(folds[0]))

        # tb = TensorBoard(log_dir='./TensorBoard/short_60/' + 'run{0}'.format(folds[1]))
        es = EarlyStopping(patience=10, verbose=1)

        pp = preprocessor()
        pp.load_extracted_fts_lbs(load_path=load_path,
                                  train_dirs=train_dirs,
                                  test_fold=test_fold,
                                  val_fold=val_fold)
        train_dirs.append(val_fold)
        train_dirs.append(test_fold)
        print("Data prep completed")

        model = piczak_CNN(input_dim=pp.train_x[0].shape,
                           output_dim=pp.train_y.shape[1])
        print("Model built")

        model.fit(pp.train_x,
                  pp.train_y,
                  validation_data=[pp.val_x, pp.val_y],
                  epochs=epochs,
                  batch_size=1000,
                  verbose=2,
                  callbacks=[es])
        print("Model trained")

        output_model_file = 'models/long60_' + str(epochs) + '_' + str(
            folds) + '.h5'
        model.save(output_model_file)
        scores = model.evaluate(pp.test_x, pp.test_y, verbose=0)
        print("loss: {0}, test-acc: {1}".format(scores[0], scores[1]))
        cvscores.append(scores[1] * 100)
        K.clear_session()

    print("Average performance after cross-validation: %.2f%% (+/- %.2f%%)" %
          (np.mean(cvscores), np.std(cvscores)))
def piczac_cross_validation(epochs, load_path):

    for fold in ((10, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7),
                 (7, 8), (8, 9), (9, 10)):
        val_fold = 'overlap/fold{0}_overlap_{1}dB'.format(
            fold[0], volumeOverlay)
        test_fold = 'overlap/fold{0}_overlap_{1}dB'.format(
            fold[1], volumeOverlay)
        single_fold = 'fold{0}'.format(fold[1])
        train_dirs.remove(val_fold)
        train_dirs.remove(test_fold)
        model = load_model('Models/short60_300_{0}.h5'.format(str(fold)))
        pp = preprocessor(
            parent_dir='C:\\Deep Learning Dataset\\UrbanSound8K\\audio_overlap'
        )
        pp.load_extracted_fts_lbs(load_path=load_path,
                                  train_dirs=train_dirs,
                                  val_fold=val_fold,
                                  test_fold=test_fold,
                                  single_fold=single_fold)

        scores = model.evaluate(pp.test_x, pp.test_y, verbose=0)
        print("Test_fold: {2} Pretrain overlap - loss: {0}, test-acc: {1}".
              format(scores[0], scores[1], fold[1]))
        scores = model.evaluate(pp.single_x, pp.single_y, verbose=0)
        print(
            "Test_fold: {2} Pretrain single - loss: {0}, test-acc: {1}".format(
                scores[0], scores[1], fold[1]))

        tb = TensorBoard(log_dir='./TensorBoard/' +
                         'overlap_run{0}'.format(fold[1]))
        es = EarlyStopping(patience=10, verbose=1)

        model.fit(pp.train_x,
                  pp.train_y,
                  validation_data=[pp.val_x, pp.val_y],
                  epochs=epochs,
                  batch_size=1000,
                  verbose=0,
                  callbacks=[tb, es])
        scores = model.evaluate(pp.test_x, pp.test_y, verbose=0)
        print("Test_fold: {2} Posttrain - loss: {0}, test-acc: {1}".format(
            scores[0], scores[1], fold[1]))
        scores = model.evaluate(pp.single_x, pp.single_y, verbose=0)
        print("Test_fold: {2} Posttrain single - loss: {0}, test-acc: {1}".
              format(scores[0], scores[1], fold[1]))
        K.clear_session()

        train_dirs.append(val_fold)
        train_dirs.append(test_fold)
Exemple #24
0
    def preprocess(self,
                   data,
                   is_cat=[],
                   num_quantiles=20,
                   weighted=False,
                   nthread=-1):
        self.prep = preprocessor()
        IDs, X, w, delta = self.prep.preprocess(data=data,
                                                is_cat=is_cat,
                                                num_quantiles=num_quantiles,
                                                weighted=weighted,
                                                nthread=nthread)

        self.X_colnames = X.columns.values.tolist()
        self.X_colnames = [
            item if item != 't_start' else 'time' for item in self.X_colnames
        ]

        return IDs, X, w, delta
Exemple #25
0
def main():
    #Send in the clowns!!
    cmdline = sys.argv
    fil = ""
    translated = ""
    output_file = "zzzz_output.bridge"
    exec_code = False
    processCommandline(cmdline)
    #print(cmdline)
    if len(cmdline) == 1:
        print("Usage:  lark_bridge file_to_translate [options]")
        print("You must pass a file to translate!")
        sys.exit(1)
    else:
        fil = open(cmdline[1], "r", encoding=Config.encoding).read()

    preprocessed = preprocessor.preprocessor(fil)
    translated = bridge(preprocessed)  #,True)
    print(translated)
    if exec_code:
        exec(translated)
def plot_confusion_matrix(model_filename, load_path, save=False):
    classes = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot',
               'jackhammer', 'siren', 'street_music']

    model = load_model(model_filename)
    n_folders = 10
    train_dirs = []
    for i in range(1, n_folders + 1):
        train_dirs.append('fold{0}'.format(i))
    test_fold = 'fold' + model_filename.split(', ')[1].split(')')[0]
    val_fold = 'fold' + model_filename.split(', ')[0].split('(')[1]
    # example: long200_150_(1,2).h5

    pp = preprocessor()
    pp.load_extracted_fts_lbs(train_dirs=train_dirs, test_fold=test_fold, val_fold=val_fold, load_path=load_path)
    preds = model.predict_classes(pp.train_x, verbose=0)
    # write_preds(preds, output_predictions_file)
    cm = metrics.confusion_matrix(np.argmax(pp.train_y, axis=1), preds)
    if save:
        utils.save_confusion_matrix(cm, classes)
    else:
        utils.plot_confusion_matrix(cm, classes)
	def __init__(self):
		self.__preprocessor = preprocessor()
		self.__imgsize = 0
		self.__kernels = []
		self.__kernel_indx = []
		self.__path = ""
Exemple #28
0
def preprocessor():  # reprocessor data from fetch
    for file in getFile('result'):
        pre.preprocessor(file)
    def encode(self, complex_db_cube):
        """
        Encodes the cube with a pretrained encoding Keras model that is specified in settings.
        :param complex_db_cube: numpy.array with shape (baseline,subband,timestamp,pol
        :return: numpy.array with shape (baseline,D) where D is dependent on input shape size and model.
        """
        if complex_db_cube is None or len(complex_db_cube.shape) != 4:
            raise ValueError(
                'Data is not in correct format: numpy.array with shape (baseline,subband,timestamp,pol)'
            )

        print('This is complex cube shape after mean {}'.format(
            complex_db_cube.shape))
        ##################################3
        complex_db_cube = np.concatenate(
            [complex_db_cube[..., 0:1], complex_db_cube[..., 4:5]], axis=3)
        print('This is complex cube shape after mean {}'.format(
            complex_db_cube.shape))
        ##################################3
        p = preprocessor.preprocessor(complex_db_cube)
        p.interp(self.config['n_frequencies']['value'],
                 self.config['n_time_steps']['value'])
        cube = p.get_processed_cube()

        if self.config['architecture']['value'] == 'skip':
            p.get_phase()
            phase_cube = p.get_processed_cube()
            p = preprocessor.preprocessor(cube)
            p.get_magnitude()
            p.median_threshold()
            p.minmax(per_baseline=True,
                     feature_range=(np.min(phase_cube), np.max(phase_cube)))
            encoded, _, _ = self.encoder.predict(
                [p.get_processed_cube(), phase_cube])
            return encoded.reshape(encoded.shape[0],
                                   np.product(encoded.shape[1:]))

        elif self.config['mag_phase']['value']:
            p.get_magnitude_and_phase()

        elif self.config['magnitude']['value']:
            p.get_magnitude()
            p_cube = p.get_processed_cube()
            self.config['n_layers']['value'] = p_cube.shape[
                -1]  # TODO: This might cause problems
        elif self.config['phase']['value']:
            p.get_phase()
            p_cube = p.get_processed_cube()
            self.config['n_layers']['value'] = p_cube.shape[
                -1]  # TODO: This might cause problems

        if self.config['median_threshold']['value']:
            p.median_threshold(
                per_baseline=self.config['per_baseline']['value'])

        if self.config['db']['value']:
            p.mag2db()

        if self.config['wavelets']['value']:
            p.wavelet_decomp_2D()

        if self.config['flag']['value']:
            #TODO
            raise Exception('Flagging Code Not Written')
        if self.config['freq']['value']:
            #TODO
            raise Exception('Frequency Domain Code Not Written')
        if self.config['standardise']['value']:
            p.standardise(per_baseline=self.config['per_baseline']['value'])

        elif self.config['minmax']['value']:
            p.minmax(per_baseline=self.config['per_baseline']['value'])

        real_cube = p.get_processed_cube()

        #use preprocessor to reshape cubes
        if self.config['architecture']['value'] == 'vae':
            encoded, _, _ = self.encoder.predict(real_cube)
        else:
            encoded = self.encoder.predict(real_cube)
        #encoded  = np.mean(real_cube,axis=3)[:,::4,::4]
        print('This is complex cube shape after mean {}'.format(encoded.shape))
        return encoded.reshape(encoded.shape[0], np.product(encoded.shape[1:]))
import nltk.data
from preprocessor import preprocessor

# Classify the text from the Search API
classifier = nltk.data.load("classifiers/naive_bayes.pickle")
text = preprocessor().preprocess(textt,[])
label = classifier.classify(features_extractor(text))

# Find its Probability
if label == 'traffic':
	probability_dict  = classifier.prob_classify(test)
	probability = probability_dict.prob('traffic')

#
Exemple #31
0
 def test_commands_are_left_untouched(self):
     grammar = "\%ignore abc"
     self.assertEqual(preprocessor(grammar),
                      '\%ignore abc' + self.permanent_suffix)
def trainClassifier(conn, cursor, tablename, test_tweet, enable_evaluation):
	"""Train the Naive Bayes"""
	
	stop_words = []
	
	# Fetch all the stop words
	# try:
		# query_sw = "SELECT word FROM stop_words limit 35"
		# cursor.execute(query_sw)
		# sw = cursor.fetchall()
		# stop_words = filter_tweets(sw)
		# print(stop_words)
	# except:
		# Get the most recent exception
		# exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		# print "Select Error -> %s" % exceptionValue
		# lastid="0"
	
	# Fetch all the traffic tweets
	try:
		query_pt = "SELECT tweet FROM "+ tablename +" WHERE ptraffic='y' ORDER BY tid ASC LIMIT 681"
		cursor.execute(query_pt)
		ttweets = cursor.fetchall()
	except:
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Select Error -> %s" % exceptionValue
		lastid="0"
	
	# Fetch all the non-traffic tweets	
	try:
		query_nt = "SELECT tweet FROM "+ tablename +" WHERE ntraffic='y' ORDER BY tid ASC LIMIT 681"
		cursor.execute(query_nt)
		nttweets = cursor.fetchall()
	except:
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Select Error -> %s" % exceptionValue
		lastid="0"
	
	# If the user chose to evaluate the classifier fetach more labelled tweets for testing
	if enable_evaluation == 'test':
		# Fetch all the traffic tweets for the evaluation
		try:
			query_pt = "SELECT tweet FROM "+ tablename +" WHERE ptraffic='y' ORDER BY tid DESC LIMIT 375"
			cursor.execute(query_pt)
			ttweets_test = cursor.fetchall()
		except:
			# Get the most recent exception
			exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
			print "Select Error -> %s" % exceptionValue
			lastid="0"
		
		# Fetch all the non-traffic tweets for the evaluation
		try:
			query_nt = "SELECT tweet FROM "+ tablename +" WHERE ntraffic='y' ORDER BY tid DESC LIMIT 375"
			cursor.execute(query_nt)
			nttweets_test = cursor.fetchall()
		except:
			# Get the most recent exception
			exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
			print "Select Error -> %s" % exceptionValue
			lastid="0"
		
		
	try:
	
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TRAIN SET <<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# Apply preprocessing on the traffic tweets for the train set
		data=[]
		for text in ttweets:
			temp = preprocessor().preprocess(text[0],stop_words)
			data.append(temp)
		traffic_tweets=add_label(data, 'traffic')
		
		# Apply preprocessing on the non-traffic tweets for the train set
		data=[]
		for text in nttweets:
			temp = preprocessor().preprocess(text[0],stop_words)
			data.append(temp)
		nontraffic_tweets=add_label(data, 'nontraffic')
		
		# Merge the tweets for the train set
		combined_tweets = traffic_tweets + nontraffic_tweets

		# Extract the features for the train set
		temp = []
		for i in range(len(combined_tweets)):
			temp.append(((features_extractor(combined_tweets[i][0])),combined_tweets[i][1]))
		train_set=temp
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TEST SET <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# If the user chose to evaluate the classifier create a test_set
		if enable_evaluation == 'test':
			# Apply preprocessing on the traffic tweets for the test set
			data=[]
			for text in ttweets_test:
				temp = preprocessor().preprocess(text[0],stop_words)
				data.append(temp)
			traffic_tweets_test=add_label(data, 'traffic')
			
			# Apply preprocessing on the non-traffic tweets for the test set
			data=[]
			for text in nttweets_test:
				temp = preprocessor().preprocess(text[0],stop_words)
				data.append(temp)
			nontraffic_tweets_test=add_label(data, 'nontraffic')
			
			# Merge the tweets for the test set
			combined_tweets_test = traffic_tweets_test + nontraffic_tweets_test
			
			# Extract the features for the test set
			temp = []
			for i in range(len(combined_tweets_test)):
				temp.append(((features_extractor(combined_tweets_test[i][0])),combined_tweets_test[i][1]))
			test_set=temp
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TRAIN THE CLASSIFIER <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# Train our classifier using the training set
		classifier = nltk.NaiveBayesClassifier.train(train_set)
		
		# Save the classifier in a .pickle file
		name = 'naive_bayes.pickle'
		fname = os.path.join(os.path.expanduser('~/nltk_data/classifiers'), name)
		dump_classifier(classifier, fname)
		
		# Classify the tweet
		test_tweet1 = preprocessor().preprocess(test_tweet,stop_words)
		test = features_extractor(test_tweet1)
		proba = classifier.prob_classify(test)
		print "\nThe tweet '%s' is about: %s with probability: %s\n" % (test_tweet, classifier.classify(test),proba.prob('traffic'))
		
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>> TEST THE CLASSIFIER <<<<<<<<<<<<<<<<<<<<<<<<<<<
		# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
		
		# If the user chose to evaluate the classifier apply the evaluation techniques
		if enable_evaluation == 'test':
			evaluation(test_set,classifier)
	
		
	except:	
		# Get the most recent exception
		exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
		print "Error -> %s" % exceptionValue
		lastid="0"
Exemple #33
0
def t_SOURCE(t):
    r'[^(\-\>|\|\#|\,)].*'

    scope = 0

    token_lexpos = 0

    dquotes = 0

    squotes = 0

    # i.e. 0 if token is "1" in "1 -> print" and 5 if token is "print" in "1 -> print"

    # TODO: .find() overlapping in `1 -> 1 -> 1`, .rfind() not, is .rfind() safe?

    relative_lexpos = t.lexer.lexdata.rfind(t.value)

    # Iterate token

    while token_lexpos < len(t.value):

        char = t.value[token_lexpos]

        # " ... " or ' ... ' ?

        if dquotes or squotes:

            if char == '\"':

                if dquotes:

                    dquotes = dquotes - 1

            if char == '\'':

                if squotes:

                    squotes = squotes - 1

            # Ignore other characters

        else:

            # `[]` , `()` and '{}'

            if char in '[({':

                scope = scope + 1

            if char in '])}':

                scope = scope - 1

            # `"` and `'`

            if char == '\"':

                dquotes = dquotes + 1

            if char == '\'':

                squotes = squotes + 1

            # i.e. `... X` and not `[ ... X ]` , `( ... X )` ' { ... X } ' , `" ... X"` or `' ... X'`

            if not dquotes and not squotes and not scope:

                # Comma

                if char == ',':

                    # END OF TOKEN (i.e. 1 -> print , ... )

                    break

                # Operator

                if char == '|' or \
                        (char == '-' and token_lexpos + 1 < len(t.value) and t.value[token_lexpos + 1] == '>'):

                    # END OF TOKEN (i.e. 1 -> ...)

                    break

        token_lexpos = token_lexpos + 1

    # Break?

    if token_lexpos != len(t.value):

        # Calculate a new lexpos

        t.lexer.lexpos = relative_lexpos + token_lexpos

    # Send strip()'ed token to preprocessor

    t.type, t.value = preprocessor.preprocessor(t.value[:token_lexpos].rstrip(), stmt_as_is=t.lexer.stmt_as_is)

    return t
Exemple #34
0
from crldriver import crldriver
from SQLinterface import SQLinterface
from preprocessor import preprocessor

crldriver = crldriver(headless=True)
p = preprocessor()
interface = SQLinterface(passwd='0000', dbname='mining')
#interface.push('keywords', type1 = 1, type2 = 1, word = 'test', count = 1, date = '2020.07.29')
#interface.showall('keywords')
#interface.init_table('keywords')
#interface.init_id('keywords')
#interface.delete_column('keywords', 'date')
#interface.add_column('keywords', 'date', 'DATE')
#interface.new_table('testtb')
#interface.show_table()
#interface.delete_table('testtb')


def dbupload(packet, type1, type2):
    for m in packet.keys():
        kwrds = interface.showall('Keywords')
        interface.init_table('Keywords')
        interface.init_id('Keywords')
        searched = {}
        for i in kwrds:
            searched[i['word']] = i['count']
        #print(searched)

        for n in packet[m]:
            L = list(p.keywording(n))
            for i in L:
 def wavelet_decomp(self, data):
     p = preprocessor.preprocessor(data)
     p.wavelet_decomp_2D()
     return p.get_processed_cube()
Exemple #36
0
 def test_parentheses_are_added_to_right_side(self):
     """ Test if the parentheses are added to the right side of the rule """
     grammar = "   sentence: foo  "
     self.assertEqual(
         preprocessor(grammar).strip(),
         'sentence:(foo)' + self.permanent_suffix)
Exemple #37
0
def main(args):
    # Build mask parameters DataFrame
    df_params = pd.DataFrame({"mask_name" : args.mask_name,\
                              "slice_axis" : args.slice_axis,\
                              "n_patches" : args.n_patches,\
                              "overlap"  : args.overlap, \
                              "rotation" : args.rotation})
    #     print(df_params)

    mpl.use(args.mpl_agg)

    data_io.show_header()
    if not os.path.exists(args.seg_path): os.makedirs(args.seg_path)
    if args.run_seg:

        # Understand input data format
        if os.path.isdir(args.ct_fpath):
            tiff_input = True
        elif args.ct_fpath.split('.')[-1] in ("hdf5", "h5"):
            tiff_input = False
            if args.ct_data_tag == "":
                raise ArgumentTypeError("dataset-name required for hdf5")
        else:
            raise ArgumentTypeError(
                "input file type not recognized. must be tiff folder or hdf5 file"
            )

        ct_dfile = data_io.DataFile(args.ct_fpath, \
                                    tiff = tiff_input,\
                                    data_tag = args.ct_data_tag, \
                                    VERBOSITY = args.rw_verbosity)
        ct_dfile.show_stats()
        chunk_shape = ct_dfile.chunk_shape
        if args.stats_only:
            print("\nSet stats_only = False and start over to run program.")
            sys.exit()

        # Load model from model repo
        model_filename = os.path.join(args.model_path,
                                      args.model_name + '.hdf5')
        print("\nStarting segmentation mode ...")
        segmenter = Segmenter(model_filename=model_filename)

        print("Reading CT volume into memory...")
        dd = ct_dfile.read_full()

        if args.preprocess:
            print("\tPreprocessing volume...")
            if not os.path.exists("preprocessor.py"):
                input(
                    "Looked for preprocessor.py, but not found! Please create one and press enter. Or press CTRL+C to exit"
                )

            from preprocessor import preprocessor
            dd = preprocessor(dd)

        for idx, row in df_params.iterrows():  # iterate over masks

            # assign arguments from df_params for this mask
            slice_axis = row["slice_axis"]
            max_patches = row["n_patches"]
            segfile_tag = row["mask_name"]
            overlap = row["overlap"]
            rotation = row["rotation"]

            # define DataFile object for mask
            seg_fname = os.path.join(args.seg_path, segfile_tag)
            if not args.tiff_output: seg_fname = seg_fname + ".hdf5"
            seg_dfile = data_io.DataFile(seg_fname, \
                                         data_tag = "SEG",\
                                         tiff = args.tiff_output, \
                                         d_shape = ct_dfile.d_shape, \
                                         d_type = np.uint8, \
                                         chunk_shape = chunk_shape,\
                                         VERBOSITY = args.rw_verbosity)
            seg_dfile.create_new(overwrite=args.overwrite_OK)

            t0 = time.time()

            print("\nWorking on %s\n" % segfile_tag)
            ch = process_data(dd, segmenter, \
                              slice_axis = slice_axis, \
                              rot_angle = rotation, \
                              max_patches = max_patches, \
                              overlap = overlap, \
                              nprocs = args.nprocs, \
                              arr_split = args.arr_split,\
                              arr_split_infer = args.arr_split_infer,\
                              crops = args.crops)

            seg_dfile.write_full(ch)
            t1 = time.time()
            total_time = (t1 - t0) / 60.0
            print(
                "\nDONE on %s\nTotal time for generating %s mask: %.2f minutes"
                % (time.ctime(), segfile_tag, total_time))
            del slice_axis
            del max_patches
            del segfile_tag
            del rotation
            del ch

    if args.run_ensemble:
        print("\nStarting ensemble mode ...\n")

        t0 = time.time()
        # get the d_shape of one of the masks
        temp_fname = os.path.join(args.seg_path, df_params.loc[0, "mask_name"])
        if not args.tiff_output: temp_fname = temp_fname + ".hdf5"
        temp_ds = data_io.DataFile(temp_fname,
                                   data_tag="SEG",
                                   tiff=args.tiff_output,
                                   VERBOSITY=0)
        mask_shape = temp_ds.d_shape
        chunk_shape = temp_ds.chunk_shape
        if not args.run_seg: temp_ds.show_stats()
        del temp_ds
        del temp_fname

        if args.stats_only:
            print("\nSet stats_only = False and start over to run program.")
            sys.exit()

        vote_fname = os.path.join(args.seg_path, args.vote_maskname)
        if not args.tiff_output: vote_fname = vote_fname + ".hdf5"
        vote_dfile = data_io.DataFile(vote_fname, \
                                      tiff = args.tiff_output,\
                                      data_tag = "SEG",\
                                      d_shape = mask_shape, \
                                      d_type = np.uint8, \
                                      chunk_shape = chunk_shape,\
                                      VERBOSITY = args.rw_verbosity)
        vote_dfile.create_new(overwrite=args.overwrite_OK)

        slice_start = 0
        n_masks = len(df_params)
        pbar = tqdm(total=mask_shape[0])
        while slice_start < mask_shape[0]:
            ch = [0] * len(df_params)
            for idx, row in df_params.iterrows():
                seg_fname = os.path.join(args.seg_path, row["mask_name"])
                if not args.tiff_output: seg_fname = seg_fname + ".hdf5"


                seg_dfile = data_io.DataFile(seg_fname, \
                                             tiff = args.tiff_output, \
                                             data_tag = "SEG", \
                                             VERBOSITY = args.rw_verbosity)
                if mask_shape != seg_dfile.d_shape:
                    raise ValueError("Shape of all masks must be same")

                ch[idx], s = seg_dfile.read_chunk(axis = 0, \
                                                  slice_start = slice_start, \
                                                  max_GB = args.mem_thres/(n_masks))
            ch = np.asarray(ch)
            ch = np.median(ch, axis=0).astype(np.uint8)

            vote_dfile.write_chunk(ch, axis=0, s=s)
            del ch
            slice_start = s.stop
            pbar.update(s.stop - s.start)
        pbar.close()

        t1 = time.time()
        total_time = (t1 - t0) / 60.0
        print("\nDONE on %s\nTotal time for ensemble mask %s : %.2f minutes" %
              (time.ctime(), args.vote_maskname, total_time))

        if args.remove_masks:
            print("Intermediate masks will be removed.")
            for idx, row in df_params.iterrows():  # iterate over masks
                seg_fname = os.path.join(args.seg_path, row["mask_name"])
                if not args.tiff_output:
                    seg_fname = seg_fname + ".hdf5"
                    os.remove(seg_fname)
                else:
                    rmtree(seg_fname)

    if args.morpho_filt:
        print("\nApplying morphological operations on ensemble vote...")

        vote_fname = os.path.join(args.seg_path, args.vote_maskname)
        if not args.tiff_output: vote_fname = vote_fname + ".hdf5"
        vote_dfile = data_io.DataFile(vote_fname, \
                                      tiff = args.tiff_output,\
                                      data_tag = "SEG",\
                                      VERBOSITY = args.rw_verbosity)

        from ct_segnet.morpho import morpho_filter
        vol = vote_dfile.read_full()
        vol = morpho_filter(vol, radius = args.radius, \
                            ops = args.ops, \
                            crops = args.crops, \
                            invert_mask = args.invert_mask)
        vote_dfile.write_full(vol)

    return
Exemple #38
0
import numpy as np 
from matplotlib import pyplot as plt
import matplotlib.cm as cm




import preprocessor
import mne

p = preprocessor.preprocessor()

raw = p.mne_open(p.triples[0][0], preload=True)
raw = raw.pick_types(eeg=True, meg=True)
# raw.drop_channels(['EEG061', 'EEG062', 'EEG063']) # drop mistakenly labeled HEOG, VEOG, ECG channels

# print(raw.info)
# print(raw.info['ch_names'])
raw.plot_projs_topomap()
# print(raw.info['chs'])

# eeg_picks = mne.pick_types(raw.info, eeg=True, meg=False)
# print(list(eeg_picks))
# print(list(eeg_picks))
# print(list(raw.info['ch_names']))
# print()
# print(raw.info['ch_names'] - eeg_picks)


# raw.drop_channels(raw.info['ch_names'] - eeg_picks)