Exemple #1
0
def prepare_dataset(sentences, word_to_id, char_to_id, tag_to_id, lower,
                    zeros):
    """
    Prepare the dataset. Return a list of lists of dictionaries containing:
        - word indexes
        - word char indexes
        - tag indexes
    """
    data = []
    for s in sentences:
        str_words = [w[0] for w in s]
        words = [
            word_to_id[normalise(w, lower, zeros
                                 ) if normalise(w, lower, zeros) in
                       word_to_id else '<UNK>'] for w in str_words
        ]
        # Skip characters that are not in the training set
        chars = [[char_to_id[c] for c in w if c in char_to_id]
                 for w in str_words]
        caps = [cap_feature(w) for w in str_words]
        tags = [tag_to_id[w[-1]] for w in s]
        pos_tags = pos_feature(str_words)
        data.append({
            'str_words': str_words,
            'words': words,
            'chars': chars,
            'caps': caps,
            'tags': tags,
            'pos_tags': pos_tags,
        })
    return data
Exemple #2
0
    def nn_classify(self, N, test_lc, train_files):
        best_matches = []
        best_distances = []
        best_files = []
        # Read index of each lc file
        upto = 0
        for filename in train_files:
            #if upto % 200 == 0:
            #	print upto
            upto += 1
            # Read all the light curve data into an array
            lc_data = open(self._testdir + '/' + filename)

            lc_class = filename.strip().split('_')[0]
            lc = [[], []]
            for line in lc_data:
                line = line.strip().split(',')
                lc[0].append(float(line[0]))
                lc[1].append(float(line[1]))
            lc_data.close()
            normalise(lc)
            lc = sample(lc, 400)
            lc = distribute(lc)
            # Update the nearest neighbour
            distance = self._distance_fn(test_lc, lc)

            # Find insert point

            insert_point = 0
            found = False
            for insert_point, bd in enumerate(best_distances):
                if bd >= distance:
                    found = True
                    break
            if found or len(best_distances) == 0:
                best_distances.insert(insert_point, distance)
                best_matches.insert(insert_point, lc_class)
                best_files.insert(insert_point, filename)
            # Pop from the top of the list if it's too long
            if len(best_distances) > N:
                best_distances.pop()
                best_matches.pop()
                best_files.pop()

        # Compute nearest neighbor by majority
        near_count = {}
        for c in best_matches:
            if c not in near_count.keys():
                near_count[c] = 1
            else:
                near_count[c] += 1
        #print sorted(near_count.items(), key=itemgetter(1))
        return [
            sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files
        ]
Exemple #3
0
	def nn_classify(self, N, test_lc, train_files):
		best_matches = []
		best_distances = []
		best_files = []
		# Read index of each lc file
		upto = 0
		for filename in train_files:
			#if upto % 200 == 0:
			#	print upto
			upto += 1
			# Read all the light curve data into an array
			lc_data = open(self._testdir + '/' + filename)
			
			lc_class = filename.strip().split('_')[0]
			lc = [[], []]
			for line in lc_data:
				line = line.strip().split(',')
				lc[0].append(float(line[0]))
				lc[1].append(float(line[1]))
			lc_data.close()
			normalise(lc)
			lc = sample(lc, 400)			
			lc = distribute(lc)
			# Update the nearest neighbour
			distance = self._distance_fn(test_lc, lc)
		
			# Find insert point
			
			insert_point = 0
			found = False
			for insert_point, bd in enumerate(best_distances):
				if bd >= distance:
					found = True
					break
			if found or len(best_distances) == 0:
				best_distances.insert(insert_point, distance)
				best_matches.insert(insert_point, lc_class)
				best_files.insert(insert_point, filename)
			# Pop from the top of the list if it's too long
			if len(best_distances) > N:
				best_distances.pop()
				best_matches.pop()
				best_files.pop()
		
		# Compute nearest neighbor by majority
		near_count = {}
		for c in best_matches:
			if c not in near_count.keys():
				near_count[c] = 1
			else:
				near_count[c] += 1
		#print sorted(near_count.items(), key=itemgetter(1))
		return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]
def _raised_cosine(im):
    m, n = np.shape(im)
    w1 = np.cos(np.linspace(-np.pi / 2, np.pi / 2, m))
    w1 = w1[:, None]
    w2 = np.cos(np.linspace(-np.pi / 2, np.pi / 2, n))
    w = w1 * w2
    return utils.normalise(im * w)
Exemple #5
0
    def generate_forest(self, threshold=0.25, tree_chance=0.2):
        noise = []
        for i in range(self.width):
            noise.append([])
            for j in range(self.height):
                noise[i].append(0)

        PNFactory_forest = perlin.PerlinNoiseFactory(2,
                                                     octaves=3,
                                                     tile=(),
                                                     unbias=False)

        for i in range(self.width):
            for j in range(self.height):
                noise[i][j] = PNFactory_forest(i / self.width, j / self.height)

        noise1D = []
        for i in range(self.width):
            for j in range(self.height):
                noise1D.append(noise[i][j])
        _min = np.min(noise1D)
        _max = np.max(noise1D)
        for i in range(self.width):
            for j in range(self.height):
                v = utils.normalise(noise[i][j], _min, _max)
                if v < threshold and self.grid[i][j].get_type(
                ) in life.Tree.get_good_tiles():
                    if self.grid[i][j].food == None and not self.grid[i][
                            j].is_river and np.random.random() < tree_chance:
                        self.grid[i][j].set_tree(
                            life.Tree(self.simu,
                                      self.grid[i][j],
                                      randomness=True))
Exemple #6
0
    def generate_elevation(self, start_tile=None):
        noise = []
        for i in range(self.width):
            noise.append([])
            for j in range(self.height):
                noise[i].append(0)

        PNFactory = perlin.PerlinNoiseFactory(2,
                                              octaves=4,
                                              tile=(),
                                              unbias=True)

        for i in range(self.width):
            for j in range(self.height):
                noise[i][j] = PNFactory(i / self.width, j / self.height)

        noise1D = []
        for i in range(self.width):
            for j in range(self.height):
                noise1D.append(noise[i][j])

        _min = np.min(noise1D)
        _max = np.max(noise1D)

        for i in range(self.width):
            for j in range(self.height):
                self.grid[i][j].elevation_raw = utils.normalise(
                    noise[i][j], _min, _max)
                self.grid[i][j].elevation = -3 + (
                    self.grid[i][j].elevation_raw * 11)
                self.grid[i][j].set_type_from_elevation()
                if self.grid[i][j] == "SHALLOW_WATER":
                    self.shallow_water_tiles.append(self.grid[i][j])
Exemple #7
0
    def _update_filter(self):
        print('update_filter:',self.select_file.value,self.ds.isel(time_slice=self.time_slice_deep_slider.value).amplitude.values.shape,self.filter_.shape)
        if self.selection.selection_expr is not None:
            hvds = hv.Dataset(
                (
                    np.linspace(-0.5, 0.5, self.filter_.shape[1]),
                    np.linspace(-0.5, 0.5, self.filter_.shape[0]),
                    np.zeros(self.filter_.shape),
                ),
                ["x", "y"],
                "val",
            )
            hvds = hv.Dataset(hvds.dframe())
            hvds.data["val"].loc[
                hvds.select(self.selection.selection_expr).data.index
            ] = 1
            data = hvds["val"].reshape(self.filter_.shape).copy().T[::-1]

            gauss_kernel = utils.scipy_gaussian_2D(int(self.filter_.shape[1]/40))
            filter00 = signal.fftconvolve(data, gauss_kernel, mode="same")
            filter00 = utils.normalise(filter00)

            self.filter_ = self.filter_ + filter00

        filter_ = hv.Image(self.filter_, group="filter")
        return filter_
Exemple #8
0
    def run(self, hive, drone, target):
        """Runs the controller.
        
        Arguments:
            hive {Hivemind} -- The hivemind.
            drone {Drone} -- Drone being controlled.
            target {np.ndarray} -- World coordinates of where we want to hit the ball.
        """
        # Calculate drone's distance to ball.
        distance = np.linalg.norm(hive.ball.pos - drone.pos)

        # Find directions based on where we want to hit the ball.
        direction_to_hit = normalise(target - hive.ball.pos)
        perpendicular_to_hit = np.cross(direction_to_hit, a3l([0, 0, 1]))

        # Calculating component lengths and multiplying with direction.
        perpendicular_component = perpendicular_to_hit * cap(
            np.dot(perpendicular_to_hit, hive.ball.pos),
            -distance * self.PERP_DIST_COEFF, distance * self.PERP_DIST_COEFF)
        in_direction_component = -direction_to_hit * distance * self.DIRECT_DIST_COEFF

        # Combine components to get a drive target.
        drive_target = hive.ball.pos + in_direction_component + perpendicular_component

        super().run(hive, drone, drive_target)
 def get_starting_pos(self):
     p = utils.Position()
     p.x = 0.0
     p.y = 0.0
     p.z = -50.0
     p.speed = 1400.0
     return utils.normalise(p, self.parameters)
Exemple #10
0
    def run(self, hive, drone, target):
        """Runs the controller.
        
        Arguments:
            hive {Hivemind} -- The hivemind.
            drone {Drone} -- Drone being controlled.
            target {np.ndarray} -- World coordinates of where to dodge towards.
        """
        # Calculates local target and direction.
        local_target = local(drone.orient_m, drone.pos, target)
        direction = normalise(local_target)

        # First jump
        if self.timer <= self.FST_JUMP_DURATION:
            drone.ctrl.jump = True

        # Second jump, i.e. dodge.
        if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY:
            drone.ctrl.jump = True
            drone.ctrl.pitch = -direction[0]
            drone.ctrl.paw = direction[1]

        # Expiration of the controller.
        if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY + self.SND_JUMP_DURATION:
            drone.controller = None

        super().run(hive)
Exemple #11
0
    def run(self, agent, player, target):
        """Runs the controller.
        
        Arguments:
            agent {BaseAgent} -- The agent.
            player {Car} -- Car object for which to generate controls.
            target {np.ndarray} -- World coordinates of where we want to hit the ball.
        """
        # Calculate drone's distance to ball.
        distance = np.linalg.norm(agent.ball.pos - agent.pos)

        # Find directions based on where we want to hit the ball.
        direction_to_hit = normalise(target - agent.ball.pos)
        perpendicular_to_hit = np.cross(direction_to_hit, a3l([0, 0, 1]))

        # Calculating component lengths and multiplying with direction.
        perpendicular_component = perpendicular_to_hit * cap(
            np.dot(perpendicular_to_hit, agent.ball.pos),
            -distance * self.PERP_DIST_COEFF, distance * self.PERP_DIST_COEFF)
        in_direction_component = -direction_to_hit * distance * self.DIRECT_DIST_COEFF

        # Combine components to get a drive target.
        drive_target = agent.ball.pos + in_direction_component + perpendicular_component

        super().run(agent, player, drive_target)
Exemple #12
0
    def run(self, agent, player, target):
        """Runs the controller.
        
        Arguments:
            agent {BaseAgent} -- The agent.
            player {Car} -- Car object for which to generate controls.
            target {np.ndarray} -- World coordinates of where to dodge towards.
        """
        # Calculates local target and direction.
        local_target = local(player.orient_m, player.pos, target)
        direction = normalise(local_target)

        # First jump
        if self.timer <= self.FST_JUMP_DURATION:
            agent.ctrl.jump = True

        # Second jump, i.e. dodge.
        if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY:
            agent.ctrl.jump = True
            agent.ctrl.pitch = -direction[0]
            agent.ctrl.paw = direction[1]

        # Expiration of the controller.
        if self.timer >= self.FST_JUMP_DURATION + self.SND_JUMP_DELAY + self.SND_JUMP_DURATION:
            agent.controller = None

        super().run(agent)
Exemple #13
0
	def _corrupt(self, data, corruption):
		
		if type(corruption) == float:
			cdata = np.random.binomial(size=data.shape, n=1, p=1.-corruption) * data
		elif np.shape(np.asarray(corruption).T) == np.shape(data):
			cdata = corruption.T
		else:
			if self.layers[0].data_std is not None and self.layers[0].data_norm is not None:
				scales = np.random.uniform(low=corruption[0], high=corruption[1], size=data.shape[1])
				
				data = u.unnormalise(data, self.layers[0].data_norm[0], self.layers[0].data_norm[1])
				data = u.unstandardize(data, self.layers[0].data_std[0], self.layers[0].data_std[1])
				
				p = np.random.binomial
				noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] #* p(1, 0.5) 
				noise_maps = np.asarray(noise_maps)
				cdata = data + noise_maps.T
				
				cdata, _, _ = u.standardize(cdata, self.layers[0].data_std[0], self.layers[0].data_std[1])
				cdata, _, _ = u.normalise(cdata, self.layers[0].data_norm[0], self.layers[0].data_norm[1])
				
				# Just making sure we're not out of bounds:
				min_thr = 1e-6
				max_thr = 0.99999
				
				#if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False:
				#	print np.amin(data), np.amax(data), np.mean(data), np.std(data)
				#	print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum()
				#	print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata)
				#	print 
				cdata[cdata < min_thr] = min_thr
				cdata[cdata > max_thr] = max_thr
				
		return cdata
Exemple #14
0
def prepare_sentence(str_words, word_to_id, char_to_id, lower, zeros):
    """
    Prepare a sentence for evaluation.
    """
    words = [
        word_to_id[normalise(w, lower, zeros) if normalise(w, lower, zeros) in
                   word_to_id else '<UNK>'] for w in str_words
    ]
    chars = [[char_to_id[c] for c in w if c in char_to_id] for w in str_words]
    caps = [cap_feature(w) for w in str_words]
    pos_tags = pos_feature(str_words)
    return {
        'str_words': str_words,
        'words': words,
        'chars': chars,
        'caps': caps,
        'pos_tags': pos_tags,
    }
Exemple #15
0
    def assess_states(self,
                      saved_path,
                      savestring='example',
                      pdf_savepath='../',
                      make_pdfs=True):

        self.savestring = savestring
        self.pdf_savepath = pdf_savepath

        self.dataobj = pickle.load(open(saved_path, 'rb'))

        self.norm_data = utils.normalise(self.dataobj.data_array)
        self.norm_data = utils.filterArray(
            self.norm_data,
            window_size=self.sg_filter_window_size,
            order=self.sg_filter_window_order)

        feature_obj = FeatureExtractor(self.norm_data)

        i_features = self.classifier.imputer.transform(
            feature_obj.feature_array)
        iss_features = self.classifier.std_scaler.transform(i_features)
        lda_iss_features = self.lda.transform(iss_features)

        # predict probability and also the actual state
        self.pred_table = self.r_forest_lda.predict_proba(
            lda_iss_features) * 100
        self.preds = self.r_forest_lda.predict(lda_iss_features)

        # Make stuff for the excel sheet
        self.predslist = list(self.preds)  # why need this?
        self.predslist[self.predslist == 4] = 'Baseline'
        self.max_preds = np.max(self.pred_table, axis=1)
        self.threshold_for_mixed = np.where(
            self.max_preds < int(self.threshold), 1, 0)  # 1 when below

        # do the 1st vs 2nd most likely states
        self.sorted_pred = np.sort(self.pred_table, axis=1)
        self.ratio = np.divide(self.sorted_pred[:, 2], self.sorted_pred[:, 3])
        self.threshold_for_ratio = np.where(self.ratio > 0.5, 1,
                                            0)  # 1 when below

        # combine the two measures
        self.combined_pass = np.logical_or(self.threshold_for_mixed,
                                           self.threshold_for_ratio)

        self._string_fun2()
        self._write_to_excel()
        if make_pdfs:
            plot_traces(
                self.norm_data,
                self.preds,
                savepath=self.pdf_savepath + self.savestring,
                #savestring = '/Volumes/LACIE SHARE/VM_data/All_Data_Jan_2016/pdfs0302/'+self.savestring,
                prob_thresholds=self.combined_pass)
Exemple #16
0
def word_mapping(sentences, lower, zeros):
    """
    Create a dictionary and a mapping of words, sorted by frequency.
    """
    words = [[normalise(x[0], lower, zeros) for x in s] for s in sentences]
    dico = create_dico(words)
    dico['<UNK>'] = 10000000
    word_to_id, id_to_word = create_mapping(dico)
    print "Found %i unique words (%i in total)" % (len(dico),
                                                   sum(len(x) for x in words))
    return dico, word_to_id, id_to_word
Exemple #17
0
def test_path(detail):
    # Path definition.
    a = a3l([3072, -4096, 0])
    b = a3l([3072, 2300, 0])
    c = a3l([1072, 2300, 0])

    part1 = straight(a, b, detail)
    part2 = arc(c, 2000, 0, 3 * np.pi / 4, detail)

    d = part2[-1]
    e = d + 1500 * normalise(part2[-1] - part2[-2])
    f = a3l([0, 1024, 0])
    g = a3l([0, 0, 0])

    part3 = bezier_cubic(d, e, f, g, detail)

    h = a3l([-512, 0, 0])

    part4 = arc(h, 512, 0, -np.pi, detail)

    i = part4[-1]
    j = i + 1500 * normalise(part4[-1] - part4[-2])
    k = a3l([-2800, 1200, 0])
    l = a3l([-3500, 500, 0])

    part5 = bezier_cubic(i, j, k, l, detail)

    m = 2 * l - k
    n = a3l([-3072, -1200, 0])
    o = a3l([-3072, -2000, 0])
    p = a3l([-3072, -4096, 0])

    part6 = bezier_cubic(l, m, n, o, detail)
    part7 = straight(o, p, detail)

    # Connect all the parts.
    path = np.concatenate((part1, part2, part3, part4, part5, part6, part7))

    return path
Exemple #18
0
def main():

    """
        Load data, find the optimal K value on a subsample,
        use this value of K to evaluate on a larger subsample of the data,
        printing updated statistics along the way, before returning mean
        Jaccard index across the data subsample
    """

    data_path = os.getcwd() + "/data/JPEGImages/480p/"
    anno_path = os.getcwd() + "/data/Annotations/480p/"

    imgs,masks = generate_dataset_unsupervised(data_path, anno_path,hsv=False)

    # in case data loading has ordering
    shuffle(imgs)
    shuffle(masks)

    K=FindOptimalK(imgs,masks,10)

    print("Optimal value of K is " + str(K))

    j_scores = []
    print("There are " + str(len(imgs)) + " images in the dataset.")

    for i in range(340):
        img = imgs[i]
        mask = masks[i]

        features = normalise(xyrgb(img).T)

        labels = RunKMeans(features, mask.shape, K)

        best_label = FindForegroundCluster(labels, mask, K)

        binary_fore = np.where(labels == best_label, 1, 0)
        binary_mask = np.where(mask >0 , 1 , 0)

        j = jaccard_index(binary_fore,binary_mask)

        print("This runs jscore " + str(j))

        j_scores.append(j)

        print("Running mean j-score " + str(np.mean(j_scores)))


    print("The mean Jaccard index across the data was " + str(np.mean(j_scores)))

    return j_scores
Exemple #19
0
def get_locations(article):
    #find entities
    #entities are in article['summary']

    article['title'] = normalise(article['title'])
    article['summary'] = normalise(article['summary'])

    raw = parser(article['title'] + " " + article['summary'])
    merge_ents(raw)
    entities = get_ents(raw)
    #locate entities
    #places is my entity databases
    entity_list = []
    for ent in entities:
        if ent.lower not in ["mister", "mr.", "mr"]:
            locations = get_location(ent)
            entity_list.append({"entity": ent, "locations": locations})
    #add entities to article
    article['entities'] = entity_list
    article['added_by'] = "system"

    #return article
    return article
Exemple #20
0
    def assess_states(self,
                      raw_path=None,
                      downsample_rate=None,
                      savestring='example',
                      threshold=65,
                      raw_load=True,
                      saved_path=None,
                      make_pdfs=True):

        self.threshold = '65'  # 'sureity' threshold
        self.savestring = savestring
        if raw_load:
            self.dataobj = SeizureData(raw_path, fs_dict=self.fs_dict)
            self.dataobj.load_data()
            f = open('../' + savestring + '_saved', 'wb')
            pickle.dump(self.dataobj, f)

        else:
            assert saved_path != None
            self.dataobj = pickle.load(open(saved_path, 'rb'))
        #print 'printing filename_list'
        #print self.dataobj.filename_list

        self.norm_data = utils.normalise(self.dataobj.data_array)
        feature_obj = FeatureExtractor(self.norm_data)
        i_features = self.classifier.imputer.transform(
            feature_obj.feature_array)
        iss_features = self.classifier.std_scaler.transform(i_features)
        lda_iss_features = self.lda.transform(iss_features)

        np.set_printoptions(precision=3, suppress=True)

        #self.pred_table = self.r_forest.predict_proba(iss_features)*100
        #self.preds = self.r_forest.predict(iss_features)

        self.pred_table = self.r_forest_lda.predict_proba(
            lda_iss_features) * 100
        self.preds = self.r_forest_lda.predict(lda_iss_features)

        self.predslist = list(self.preds)  # why need this?
        self.predslist[self.predslist == 4] = 'Baseline'
        self.max_preds = np.max(self.pred_table, axis=1)
        #print pred_table
        self.threshold_for_mixed = np.where(
            self.max_preds < int(self.threshold), 1, 0)  # 1 when below
        self._string_fun2()
        self._write_to_excel()
        if make_pdfs:
            self.plot_pdfs()
Exemple #21
0
def readDM():
    print "Loading dm space..."
    dm_dict = {}
    with open("../DS/ukwac.predict.dm") as f:
        dmlines = f.readlines()
        f.close()

    #Make dictionary with key=row, value=vector
    for l in dmlines:
        items = l.rstrip('\n').split('\t')
        row = items[0]
        vec = [float(i) for i in items[1:]]
        dm_dict[row] = normalise(vec)
    print "Space loaded..."
    return dm_dict
Exemple #22
0
def draw_healthbar(value,
                   max_value,
                   topleft,
                   size,
                   surface,
                   c1=(255, 0, 0, 255),
                   c2=(0, 255, 0, 255),
                   min_value=0):
    factor = utils.normalise(value, min_value, max_value)

    pygame.draw.rect(surface, c1, pygame.Rect(topleft, size))
    if int(size[0] * factor) != 0:
        pygame.draw.rect(
            surface, c2, pygame.Rect(topleft,
                                     (int(size[0] * factor), size[1])))
    def assess_states(self, raw_path = None, downsample_rate = None, savestring = 'example',
                      threshold = 65,
                      raw_load = True,
                      saved_path = None,
                      make_pdfs = True):

        self.threshold = '65' # 'sureity' threshold
        self.savestring = savestring
        if raw_load:
            self.dataobj = SeizureData(raw_path, fs_dict = self.fs_dict)
            self.dataobj.load_data()
            f = open('../'+savestring+'_saved','wb')
            pickle.dump(self.dataobj,f)

        else:
            assert saved_path != None
            self.dataobj = pickle.load(open(saved_path,'rb'))
        #print 'printing filename_list'
        #print self.dataobj.filename_list

        self.norm_data = utils.normalise(self.dataobj.data_array)
        feature_obj = FeatureExtractor(self.norm_data)
        i_features = self.classifier.imputer.transform(feature_obj.feature_array)
        iss_features = self.classifier.std_scaler.transform(i_features)
        lda_iss_features = self.lda.transform(iss_features)

        np.set_printoptions(precision=3, suppress = True)

        #self.pred_table = self.r_forest.predict_proba(iss_features)*100
        #self.preds = self.r_forest.predict(iss_features)

        self.pred_table = self.r_forest_lda.predict_proba(lda_iss_features)*100
        self.preds = self.r_forest_lda.predict(lda_iss_features)

        self.predslist = list(self.preds) # why need this?
        self.predslist[self.predslist == 4] = 'Baseline'
        self.max_preds = np.max(self.pred_table, axis = 1)
        #print pred_table
        self.threshold_for_mixed = np.where(self.max_preds < int(self.threshold),1,0) # 1 when below
        self._string_fun2()
        self._write_to_excel()
        if make_pdfs:
            self.plot_pdfs()
Exemple #24
0
def process_two_images(model, imgs, ctx=None):
    """
    Process two images into one flow image
    Args:
        model: The model to use
        imgs: a list of 2 images
        ctx: the model ctx

    Returns:

    """
    if len(imgs) != 2:
        return None
    if isinstance(imgs[0], str):
        if os.path.exists(imgs[0]):
            imgs[0] = cv2.cvtColor(cv2.imread(files[i]), cv2.COLOR_BGR2RGB)
        else:
            return None
    if isinstance(imgs[1], str):
        if os.path.exists(imgs[1]):
            imgs[1] = cv2.cvtColor(cv2.imread(files[i]), cv2.COLOR_BGR2RGB)
        else:
            return None

    imgs = crop(imgs)
    imgs = np.array(imgs)
    imgs = np.moveaxis(imgs, -1, 1)
    imgs = normalise(imgs)

    imgs = mx.nd.array(imgs, ctx=ctx)
    imgs = mx.nd.expand_dims(imgs, 0)  # add batch axis

    flow = model(imgs)  # run the model

    flow = flow.asnumpy()
    flow = flow.squeeze()
    flow = flow.transpose(1, 2, 0)
    img = flow_to_image(flow)
    img = imresize(
        img, 4.0
    )  # doing the bilinear interpolation on the img, NOT flow cause was too hard :'(

    return img, flow
Exemple #25
0
	def _corrupt(self, data):
		
		if type(self.corruption) == float:
			cdata = np.random.binomial(size=data.shape, n=1, p=1.-self.corruption) * data
		elif np.shape(np.asarray(self.corruption).T) == np.shape(data):
			cdata = self.corruption.T
		else:
			
			if self.data_std is not None and self.data_norm is not None:
				scales = np.random.uniform(low=self.corruption[0], high=self.corruption[1], size=data.shape[1])
				
				data = u.unnormalise(data, self.data_norm[0], self.data_norm[1])
				data = u.unstandardize(data, self.data_std[0], self.data_std[1])
				
				p = np.random.binomial
				noise_maps = [np.random.normal(scale=sig, size=data.shape[0]) for sig in scales] # * p(1, 0.5)
				noise_maps = np.asarray(noise_maps)
				
				cdata = data + noise_maps.T
				
				cdata, _, _ = u.standardize(cdata, self.data_std[0], self.data_std[1])
				cdata, _, _ = u.normalise(cdata, self.data_norm[0], self.data_norm[1])
				
				# Just making sure we're not out of bounds:
				min_thr = 1e-6
				max_thr = 0.99999
				
				#if ((cdata < min_thr).sum() > 0 or (cdata > max_thr).sum() > 0) and False:
				#	print np.amin(data), np.amax(data), np.mean(data), np.std(data)
				#	print 'N/C:', (cdata < min_thr).sum(), (cdata > max_thr).sum()
				cdata[cdata < min_thr] = min_thr
				cdata[cdata > max_thr] = max_thr
				
				#print np.amin(cdata), np.amax(cdata), np.mean(cdata), np.std(cdata)
			else:
				raise RuntimeError("Can't normalise the data (%s, %s). You must provide the normalisation and standardisation values. Giving up." % (self.data_std, self.data_norm))
		#print np.amin(data), np.amax(data)
		#print np.amin(cdata), np.amax(cdata)
		return cdata
Exemple #26
0
def FindOptimalK(imgs,masks, upper=10):

    """
        Find K value with best mean J score on
        first 50 training examples, to be used for further experimentation
    """

    out = 0
    outmax = 0.0

    for k in range(2,upper):
        print(k)
        j_scores = []

        for i in range(25):
            img = imgs[i]
            mask = masks[i]

            features = normalise(xyrgb(img).T)

            labels = RunKMeans(features, mask.shape, k)

            best_label = FindForegroundCluster(labels, mask, k)

            binary_fore = np.where(labels == best_label, 1, 0)
            binary_mask = np.where(mask >0 , 1 , 0)

            j = jaccard_index(binary_fore,binary_mask)
            j_scores.append(j)

        print(np.mean(j_scores))

        if np.mean(j_scores) > outmax:
            out =k
            outmax = np.mean(j_scores)

    return out
Exemple #27
0
 def k(self, new):
     self.k_history.append(normalise(new))
     self._k = normalise(new)
for context in sorted(chars, key=chars.get, reverse=True):
    ppmi = chars[context]

    i = 0
    context_vector = np.zeros(num_dims)
    #print("Reweighting vector with context",context)
    for col in background_cols:
        if context in background_space and col in background_space:  #in case core space does not include context (e.g. bnc.2000 does not include 'rat')
            context_vector[i] = pow(
                utils.cosine_similarity(background_space[context],
                                        background_space[col]), context_weight)
            if math.isnan(context_vector[i]):
                context_vector[i] = 0.0
        i += 1

    context_vector = utils.normalise(context_vector)
    reweighted_vectors.append(background_space[target] * context_vector)
    c += 1

    if c > num_chars:
        break
'''Add character to space'''
#print("Computing vector for",character)
background_space[character] = sum(reweighted_vectors)

new_chars = {}
for i in range(len(background_space[character])):
    new_chars[background_cols[i]] = background_space[character][i]
'''Print top contexts for character'''
c = 1
top_contexts = ""
Exemple #29
0
    def build(self,
              dropout,
              char_dim,
              char_lstm_dim,
              char_bidirect,
              word_dim,
              word_lstm_dim,
              word_bidirect,
              lr_method,
              pre_emb,
              crf,
              cap_dim,
              training=True,
              **kwargs
              ):
        """
        Build the network.
        """
        # Training parameters
        n_words = len(self.id_to_word)
        n_chars = len(self.id_to_char)
        n_tags = len(self.id_to_tag)

        # Number of capitalization features
        if cap_dim:
            n_cap = 4

        # Network variables
        is_train = T.iscalar('is_train')
        word_ids = T.ivector(name='word_ids')
        char_for_ids = T.imatrix(name='char_for_ids')
        char_rev_ids = T.imatrix(name='char_rev_ids')
        char_pos_ids = T.ivector(name='char_pos_ids')
        tag_ids = T.ivector(name='tag_ids')
        if cap_dim:
            cap_ids = T.ivector(name='cap_ids')

        # Sentence length
        s_len = (word_ids if word_dim else char_pos_ids).shape[0]

        # Final input (all word features)
        input_dim = 0
        inputs = []

        #
        # Word inputs
        #
        if word_dim:
            input_dim += word_dim
            word_layer = EmbeddingLayer(n_words, word_dim, name='word_layer')
            word_input = word_layer.link(word_ids)
            inputs.append(word_input)
            # Initialize with pretrained embeddings
            if pre_emb and training:
                new_weights = word_layer.embeddings.get_value()
                print 'Loading pretrained embeddings from %s...' % pre_emb
                pretrained = {}
                emb_invalid = 0
                for i, line in enumerate(open(pre_emb, 'r')):
                    line = line.decode('utf8', 'ignore')
                    line = line.rstrip().split()
                    if len(line) == word_dim + 1:
                        pretrained[line[0]] = np.array(
                            [float(x) for x in line[1:]]
                        ).astype(np.float32)
                    else:
                        emb_invalid += 1
                if emb_invalid > 0:
                    print 'WARNING: %i invalid lines' % emb_invalid
                c_found = 0
                c_normal = 0
                # Lookup table initialization
                for i in xrange(n_words):
                    word = self.id_to_word[i]
                    if word in pretrained:
                        new_weights[i] = pretrained[word]
                        c_found += 1
                    elif normalise(word, True, True) in pretrained:
                        new_weights[i] = pretrained[normalise(word, True, True)]
                        c_normal += 1
                    else:
                        print word
                word_layer.embeddings.set_value(new_weights)
                print 'Loaded %i pretrained embeddings.' % len(pretrained)
                print ('%i / %i (%.4f%%) words have been initialized with '
                       'pretrained embeddings.') % (
                            c_found + c_normal, n_words,
                            100. * (c_found + c_normal) / n_words
                      )
                print ('%i found directly, %i after normalising,') % (
                          c_found, c_normal
                      )

        #
        # Chars inputs
        #
        if char_dim:
            input_dim += char_lstm_dim
            char_layer = EmbeddingLayer(n_chars, char_dim, name='char_layer')

            char_lstm_for = LSTM(char_dim, char_lstm_dim, with_batch=True,
                                 name='char_lstm_for')
            char_lstm_rev = LSTM(char_dim, char_lstm_dim, with_batch=True,
                                 name='char_lstm_rev')

            char_lstm_for.link(char_layer.link(char_for_ids))
            char_lstm_rev.link(char_layer.link(char_rev_ids))

            char_for_output = char_lstm_for.h.dimshuffle((1, 0, 2))[
                T.arange(s_len), char_pos_ids
            ]
            char_rev_output = char_lstm_rev.h.dimshuffle((1, 0, 2))[
                T.arange(s_len), char_pos_ids
            ]

            inputs.append(char_for_output)
            if char_bidirect:
                inputs.append(char_rev_output)
                input_dim += char_lstm_dim

        #
        # Capitalization feature
        #
        if cap_dim:
            input_dim += cap_dim
            cap_layer = EmbeddingLayer(n_cap, cap_dim, name='cap_layer')
            inputs.append(cap_layer.link(cap_ids))

        # Prepare final input
        if len(inputs) != 1:
            inputs = T.concatenate(inputs, axis=1)

        #
        # Dropout on final input
        #
        if dropout:
            dropout_layer = DropoutLayer(p=dropout)
            input_train = dropout_layer.link(inputs)
            input_test = (1 - dropout) * inputs
            inputs = T.switch(T.neq(is_train, 0), input_train, input_test)

        # LSTM for words
        word_lstm_for = LSTM(input_dim, word_lstm_dim, with_batch=False,
                             name='word_lstm_for')
        word_lstm_rev = LSTM(input_dim, word_lstm_dim, with_batch=False,
                             name='word_lstm_rev')
        word_lstm_for.link(inputs)
        word_lstm_rev.link(inputs[::-1, :])
        word_for_output = word_lstm_for.h
        word_rev_output = word_lstm_rev.h[::-1, :]
        if word_bidirect:
            final_output = T.concatenate(
                [word_for_output, word_rev_output],
                axis=1
            )
            tanh_layer = HiddenLayer(2 * word_lstm_dim, word_lstm_dim,
                                     name='tanh_layer', activation='tanh')
            final_output = tanh_layer.link(final_output)
        else:
            final_output = word_for_output

        # Sentence to Named Entity tags - Score
        final_layer = HiddenLayer(word_lstm_dim, n_tags, name='final_layer',
                                  activation=(None if crf else 'softmax'))
        tags_scores = final_layer.link(final_output)

        # No CRF
        if not crf:
            cost = T.nnet.categorical_crossentropy(tags_scores, tag_ids).mean()
        # CRF
        else:
            transitions = shared((n_tags + 2, n_tags + 2), 'transitions')

            small = -1000
            b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
            e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
            observations = T.concatenate(
                [tags_scores, small * T.ones((s_len, 2))],
                axis=1
            )
            observations = T.concatenate(
                [b_s, observations, e_s],
                axis=0
            )

            # Score from tags
            real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()

            # Score from transitions
            b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
            e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
            padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
            real_path_score += transitions[
                padded_tags_ids[T.arange(s_len + 1)],
                padded_tags_ids[T.arange(s_len + 1) + 1]
            ].sum()

            all_paths_scores = forward(observations, transitions)
            cost = - (real_path_score - all_paths_scores)

        # Network parameters
        params = []
        if word_dim:
            self.add_component(word_layer)
            params.extend(word_layer.params)
        if char_dim:
            self.add_component(char_layer)
            self.add_component(char_lstm_for)
            params.extend(char_layer.params)
            params.extend(char_lstm_for.params)
            if char_bidirect:
                self.add_component(char_lstm_rev)
                params.extend(char_lstm_rev.params)
        self.add_component(word_lstm_for)
        params.extend(word_lstm_for.params)
        if word_bidirect:
            self.add_component(word_lstm_rev)
            params.extend(word_lstm_rev.params)
        if cap_dim:
            self.add_component(cap_layer)
            params.extend(cap_layer.params)
        self.add_component(final_layer)
        params.extend(final_layer.params)
        if crf:
            self.add_component(transitions)
            params.append(transitions)
        if word_bidirect:
            self.add_component(tanh_layer)
            params.extend(tanh_layer.params)

        # Prepare train and eval inputs
        eval_inputs = []
        if word_dim:
            eval_inputs.append(word_ids)
        if char_dim:
            eval_inputs.append(char_for_ids)
            if char_bidirect:
                eval_inputs.append(char_rev_ids)
            eval_inputs.append(char_pos_ids)
        if cap_dim:
            eval_inputs.append(cap_ids)
        train_inputs = eval_inputs + [tag_ids]

        # Parse optimization method parameters
        if "-" in lr_method:
            lr_method_name = lr_method[:lr_method.find('-')]
            lr_method_parameters = {}
            for x in lr_method[lr_method.find('-') + 1:].split('-'):
                split = x.split('_')
                assert len(split) == 2
                lr_method_parameters[split[0]] = float(split[1])
        else:
            lr_method_name = lr_method
            lr_method_parameters = {}

        # Compile training function
        print 'Compiling...'
        if training:
            updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters)
            f_train = theano.function(
                inputs=train_inputs,
                outputs=cost,
                updates=updates,
                givens=({is_train: np.cast['int32'](1)} if dropout else {})
            )
        else:
            f_train = None

        # Compile evaluation function
        if not crf:
            f_eval = theano.function(
                inputs=eval_inputs,
                outputs=tags_scores,
                givens=({is_train: np.cast['int32'](0)} if dropout else {})
            )
        else:
            f_eval = theano.function(
                inputs=eval_inputs,
                outputs=forward(observations, transitions, viterbi=True,
                                return_alpha=False, return_best_sequence=True),
                givens=({is_train: np.cast['int32'](0)} if dropout else {})
            )

        return f_train, f_eval
Exemple #30
0
        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))
        loss_epoch += loss

        # Report progress
        if not (n_batch + 1) % max(1, np.round(total_batches / 40)):
            progress = np.round(((n_batch + 1) / total_batches) * 100)
            avg_loss = loss_epoch / (n_batch + 1)
            print(
                "Epoch {}. Progress {}. Average loss in the epoch until now is {}"
                .format(ep, progress, avg_loss))

    # VALIDATION
    qid_emb = model.ent_rpr(cands_qid).numpy()
    qid_emb = normalise(qid_emb)
    mnt_embed = model.mnt_rpr(dev_mnt).numpy()
    mnt_embed = normalise(mnt_embed)
    scores = np.dot(mnt_embed, qid_emb.T)
    # Check if the right entity is in the TOP-30 (much faster than computing np.argsort)
    k = 30
    ranking = np.argpartition(scores, -k)[:, -k:]
    easy, medium, hard, total = [], [], [], []
    cnt_nones = 0
    for which in range(len(dev_ent)):
        if dev_ent[which] in qid2id:
            is_shortlisted = int(qid2id[dev_ent[which]] in ranking[which])
        else:
            is_shortlisted = 0
            cnt_nones += 1
        total.append(is_shortlisted)
if __name__ == "__main__":

    #Parameters for the dataset
    chunk_size = 200

    #Load in the input data
    dirs = extract_file_names(
        "/home/alex/Projects/Unsupervised/kepler_q9_variability/")

    data = extract_data(dirs)

    data = split_to_chunk(data, chunk_size)

    datalist = convert_datalist(data)

    datalist = normalise(datalist)

    data_arr = np.vstack(datalist)

    with open("autoencoder_dataset.pkl", "wb") as f:
        pickle.dump(data_arr, f)
        print("Written ae_dataset.pkl")

    ### Plotting
    #for i in range(0, 100):
    #    print(datalist[i].shape)
    #    plt.figure()
    #    plt.title('Segment %i' % i)
    #    plt.scatter(range(0, chunk_size), datalist[i])
    #    plt.ylabel('Un-normalised flux')
    #    plt.xlabel('Data point index')
if c.patch_type == 'random':
    data = np.load(c.dataroot + 'nr_patches_1000_random.npz')
    #data = np.load(c.dataroot + 'nr_patches_500_random1.npz')
else:
    data = np.load(c.dataroot + 'nr_patches_1000_most_err_patch_size_96_nr_pats_41_percent_fp_0_mode_fpfn_part_1.npz')


imgs = data['img']
mask = data['label']

if c.nr_patients<41:
    imgs = imgs[:c.nr_patients*1000]
    mask = mask[:c.nr_patients*1000]

# normalise the input images to range between [-1,1]
imgs_norm = np.array([ut.normalise(i) for i in imgs[:, :, :, 0]])

# convert the images and masks to tensors
tensor_imgs = torch.FloatTensor(imgs_norm)
tensor_mask = torch.FloatTensor(mask[:, :, :, 0])  # removing channel dimension for mask or label as well

# stack them together for the generator as 2 channels
train_pair = torch.stack((tensor_imgs, tensor_mask), 1)

dataset = data_utils.TensorDataset(train_pair)

dataloader = data_utils.DataLoader(dataset, batch_size=c.batch_size,
                                   shuffle=True, num_workers=c.workers)

# Device selection
device = torch.device("cuda:"+str(c.cuda_n[0]) if(torch.cuda.is_available() and
Exemple #33
0
def main():
    # parse the command line arguments
    parser = utils.argument_parser()
    args = parser.parse_args()

    print("-------------------------------")
    print("classifier:%s" % args.classifier)
    print("inverter:%s" % args.inverter)
    print("dataset_path:%s" % args.dataset_path)
    print("dataset name:%s" % args.dataset)
    print("results path:%s" % args.results_dir)
    print("inverting from: %s" % args.layer)
    print("-------------------------------")

    # default parameters
    sample_rate = 22050
    frame_len = 1024
    fps = 70
    mel_bands = 80
    mel_min = 27.5
    mel_max = 8000
    blocklen = 115
    batchsize = 32
    start_offset = 10  # secs
    end_offset = 20  # secs

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), args.dataset_path,
                           'datasets', args.dataset)

    # load filelist
    with io.open(os.path.join(datadir, 'filelists', 'test')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]

    # compute spectra
    print("Computing%s spectra..." %
          (" or loading" if args.cache_spectra else ""))

    spects = [
    ]  # list of tuples, where each tuple has magnitude and phase information for one audio file
    for fn in progress(filelist, 'File '):
        cache_fn = (args.cache_spectra
                    and os.path.join(args.cache_spectra, fn + '.npy'))
        spects.append(
            cached(cache_fn, audio.extract_spect,
                   os.path.join(datadir, 'audio', fn), sample_rate, frame_len,
                   fps))

    # prepare mel filterbank
    filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands,
                                             mel_min, mel_max)
    filterbank = filterbank[:bin_mel_max].astype(floatX)

    # precompute mel spectra, if needed, otherwise just define a generator
    mel_spects = (np.log(
        np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7))
                  for spect in spects)

    # load mean/std or compute it, if not computed yet
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % args.dataset)
    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    print("Preparing training data feed...")
    # normalised mel spects, without data augmentation
    mel_spects = [(spect - mean) * istd for spect in mel_spects]

    # we create two theano functions
    # the first one uses pre-trained classifier to generate features and predictions
    # the second one uses pre-trained inverter to generate mel spectrograms from input features

    # classifier (discriminator) model
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(
        0, 'x', 1, 2
    )  # insert "channels" dimension, changes a 32 x 115 x 80 input to 32 x 1 x 115 x 80 input which is fed to the CNN

    network = model.architecture(inputs, (None, 1, blocklen, mel_bands))

    # load saved weights
    with np.load(args.classifier) as f:
        lasagne.layers.set_all_param_values(
            network['fc9'], [f['param%d' % i] for i in range(len(f.files))])

    # create output expression
    outputs_score = lasagne.layers.get_output(network[args.layer],
                                              deterministic=True)
    outputs_pred = lasagne.layers.get_output(network['fc9'],
                                             deterministic=True)

    # prepare and compile prediction function
    print("Compiling classifier function...")
    pred_fn_score = theano.function([input_var],
                                    outputs_score,
                                    allow_input_downcast=True)
    pred_fn = theano.function([input_var],
                              outputs_pred,
                              allow_input_downcast=True)

    # inverter (generator) model
    if (args.layer == 'fc8') or (args.layer == 'fc7'):
        input_var_deconv = T.matrix('input_var_deconv')
    else:
        input_var_deconv = T.tensor4('input_var_deconv')

    # inverter (generator) model
    if (args.layer == 'fc8'):
        gen_network = upconv.architecture_upconv_fc8(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1]))
    elif args.layer == 'fc7':
        gen_network = upconv.architecture_upconv_fc7(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1]))
    elif args.layer == 'mp6':
        gen_network = upconv.architecture_upconv_mp6(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv5':
        gen_network = upconv.architecture_upconv_conv5(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv4':
        gen_network = upconv.architecture_upconv_conv4(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'mp3':
        gen_network = upconv.architecture_upconv_mp3(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv2':
        gen_network = upconv.architecture_upconv_conv2(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    else:
        gen_network = upconv.architecture_upconv_conv1(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)

    # load saved weights
    with np.load(args.inverter) as f:
        lasagne.layers.set_all_param_values(
            gen_network, [f['param%d' % i] for i in range(len(f.files))])

    # create cost expression
    outputs = lasagne.layers.get_output(gen_network, deterministic=True)
    print("Compiling inverter function...")
    test_fn = theano.function([input_var_deconv],
                              outputs,
                              allow_input_downcast=True)

    # instance-based feature inversion
    # (1) pick a file from a dataset (e.g., dataset: Jamendo test) (2) select a time index to read the instance
    file_idx = np.arange(0, len(filelist))
    hop_size = sample_rate / fps  # samples

    for file_instance in file_idx:
        print("<<<<Analysis for the file: %d>>>>" % (file_instance + 1))
        time_idx = np.random.randint(
            start_offset, end_offset, 1
        )[0]  # provides a random integer start position between start and end offsets

        # generate excerpts for the selected file_idx
        # excerpts is a 3-d array of shape: num_excerpts x blocklen x mel_spects_dimensions
        num_excerpts = len(mel_spects[file_instance]) - blocklen + 1
        print("Number of excerpts in the file :%d" % num_excerpts)
        excerpts = np.lib.stride_tricks.as_strided(
            mel_spects[file_instance],
            shape=(num_excerpts, blocklen, mel_spects[file_instance].shape[1]),
            strides=(mel_spects[file_instance].strides[0],
                     mel_spects[file_instance].strides[0],
                     mel_spects[file_instance].strides[1]))

        # convert the time_idx to the excerpt index
        excerpt_idx = int(np.round((time_idx * sample_rate) / (hop_size)))
        print("Time_idx: %f secs, Excerpt_idx: %d" % (time_idx, excerpt_idx))
        if ((excerpt_idx + batchsize) > num_excerpts):
            print(
                "------------------Number of excerpts are less for file: %d--------------------"
                % (file_instance + 1))
            break

        # generating feature representations for the select excerpt.
        # CAUTION: Need to feed mini-batch to pre-trained model, so (mini_batch-1) following excerpts are also fed, but are not analysed
        # classifier can have less than minibatch data, but the inverter needs a batch of data to make prediction (comes from how the inverter was trained)
        scores = pred_fn_score(excerpts[excerpt_idx:excerpt_idx + batchsize])
        #print("Feature"),
        #print(scores[file_idx])

        predictions = pred_fn(excerpts[excerpt_idx:excerpt_idx + batchsize])
        #print("Prediction:%f" %(predictions[0][0]))

        mel_predictions = np.squeeze(
            test_fn(scores), axis=1
        )  # mel_predictions is a 3-d array of shape batch_size x blocklen x n_mels

        # saves plots for the input Mel spectrogram and its inverted representation
        # all plots are normalised in [0, 1] range
        plots.plot_figures(utils.normalise(excerpts[excerpt_idx]),
                           utils.normalise(mel_predictions[0]),
                           predictions[0][0], file_instance, excerpt_idx,
                           args.results_dir, args.layer)
Exemple #34
0
    def feature_compose(
            mean_duration: float,
            mean_packet: float,
            mean_num_of_bytes: float,  #mean_packet_rate: float,
            #mean_byte_rate: float,
        std_duration: float,
            std_packet: float,
            std_num_of_bytes: float,
            #std_packet_rate: float, std_byte_rate: float,
            entropy_protocol: float,
            entropy_dst_ip: float,
            entropy_src_port: float,
            entropy_dst_port: float,
            entropy_flags: float,
            proportion_src_port: list,
            proportion_dst_port: list) -> list:
        """
        Compose the feature array
        :param mean_duration: mean duration
        :param mean_packet: mean packet
        :param mean_num_of_bytes: mean number of bytes
        #:param mean_packet_rate: mean packet rate
        #:param mean_byte_rate: mean byte rate
        :param std_duration: std duration
        :param std_packet: std packet
        :param std_num_of_bytes: std number of bytes
        #:param std_packet_rate: std packet rate
        #:param std_byte_rate: std byte rate
        :param entropy_protocol: entropy of protocol
        :param entropy_dst_ip: entropy of dest ip
        :param entropy_src_port: entropy of src ip
        :param entropy_dst_port: entropy of dest port
        :param entropy_flags: entropy of flags
        :param proportion_src_port: proportion of src common ports
        :param proportion_dst_port: proportion of dest common port
        :type mean_duration: float
        :type mean_packet: float
        :type mean_num_of_bytes: float
        #:type mean_packet_rate: float
        #:type mean_byte_rate: float
        :type std_duration: float
        :type std_packet: float
        :type std_num_of_bytes: float
        #:type std_packet_rate: float
        #:type std_byte_rate: float
        :type entropy_protocol: float
        :type entropy_dst_ip: float
        :type entropy_src_port: float
        :type entropy_dst_port: float
        :type entropy_flags: float
        :type proportion_src_port: list
        :type proportion_dst_port: list
        :return: feature array
        :rtype list
        """
        # normalise
        mean_duration = normalise(mean_duration,
                                  *feature_min_max.get('mean_duration'))
        mean_packet = normalise(mean_packet,
                                *feature_min_max.get('mean_packet'))
        mean_num_of_bytes = normalise(
            mean_num_of_bytes, *feature_min_max.get('mean_num_of_bytes'))
        #mean_packet_rate = normalise(mean_packet_rate, *feature_min_max.get('mean_packet_rate'))
        #mean_byte_rate = normalise(mean_byte_rate, *feature_min_max.get('mean_byte_rate'))
        std_duration = normalise(std_duration,
                                 *feature_min_max.get('std_duration'))
        std_packet = normalise(std_packet, *feature_min_max.get('std_packet'))
        std_num_of_bytes = normalise(std_num_of_bytes,
                                     *feature_min_max.get('std_num_of_bytes'))
        #std_packet_rate = normalise(std_packet_rate, *feature_min_max.get('std_packet_rate'))
        #std_byte_rate = normalise(std_byte_rate, *feature_min_max.get('std_byte_rate'))
        entropy_protocol = normalise(entropy_protocol,
                                     *feature_min_max.get('entropy_protocol'))
        entropy_dst_ip = normalise(entropy_dst_ip,
                                   *feature_min_max.get('entropy_dst_ip'))
        entropy_src_port = normalise(entropy_src_port,
                                     *feature_min_max.get('entropy_src_port'))
        entropy_dst_port = normalise(entropy_dst_port,
                                     *feature_min_max.get('entropy_dst_port'))
        entropy_flags = normalise(entropy_flags,
                                  *feature_min_max.get('entropy_flags'))
        """
        feature_arr = [
            mean_duration, mean_packet, mean_num_of_bytes, mean_packet_rate, mean_byte_rate, std_duration, std_packet,
            std_num_of_bytes, std_packet_rate, std_byte_rate, entropy_protocol, entropy_dst_ip, entropy_src_port,
            entropy_dst_port, entropy_flags,
        ]
        """
        feature_arr = [
            mean_duration,
            mean_packet,
            mean_num_of_bytes,
            std_duration,
            std_packet,
            std_num_of_bytes,
            entropy_protocol,
            entropy_dst_ip,
            entropy_src_port,
            entropy_dst_port,
            entropy_flags,
        ]

        feature_arr.extend(proportion_src_port)
        feature_arr.extend(proportion_dst_port)

        return feature_arr
import pickle
import matplotlib.pyplot as plt
import numpy as np

import utils
from network_loader import SeizureData
from relabeling_functions import relabel,reorder
from extrator import FeatureExtractor
from classifier import NetworkClassifer
from make_pdfs import plot_traces

################# Training Data ###################
reload_training = True
if reload_training:
    training_traces = utils.raw_training_load()
    training_traces_norm = utils.normalise(training_traces)
    training_data = FeatureExtractor(training_traces_norm)
    #f = open('../full_raw_training','wb')
    #pickle.dump(training_traces,f)

elif not reload_training:
    print 'skipping raw training load'
    training_traces = pickle.load(open('../full_raw_training','rb'))
    training_traces_norm = utils.normalise(training_traces)
    training_data = FeatureExtractor(training_traces_norm)
    np.savetxt('training_traces.csv',training_traces_norm,delimiter=',')

################# Training Labels and mixed event exclusion ###################
cleanup = np.loadtxt('../Training_cleanup.csv',delimiter=',')
training_labels = np.array([int(x[1]) for x in cleanup])
print training_labels.shape