def evaluate_one_percent(self): """ evaluate one percent of all fonts """ if self.percent >= len(self.df_percents): self.got_results = True return False norm_user_img = utils.normalize(self.user_image) fonts = self.df_percents[self.percent]['aws_bucket_key'] indices = [(self.font_index_map[font]) for font in fonts] imgs = self.char_array[indices] norm_imgs = np.zeros(imgs.shape) for i in range(imgs.shape[0]): norm_imgs[i] = utils.normalize(imgs[i]) norm_imgs.shape = (norm_imgs.shape[0], norm_imgs.shape[1]*norm_imgs.shape[2]) predictions = self.nn.predict_proba(norm_imgs) scores = np.divide(predictions[:,0], np.max(predictions[:,1:], axis=1)) self.scores.update(dict(zip(fonts, scores))) self.percent += 1 if self.percent % 10 == 0: print '{0}% fonts evaluated'.format(self.percent) return True
def add_doc_attributes(doc): doc_json = proposal_utils.doc_info(doc) properties = extract.get_properties(doc_json) for name, value in properties.items(): logger.info("Adding %s attribute", name) published = doc.published or datetime.now() handle = normalize(name) try: attr = Attribute.objects.get(proposal=doc.proposal, handle=handle) except Attribute.DoesNotExist: attr = Attribute(proposal=doc.proposal, name=name, handle=normalize(name), published=published) attr.set_value(value) else: # TODO: Either mark the old attribute as stale and create a # new one or create a record that the value has changed if published > attr.published: attr.clear_value() attr.set_value(value) attr.save() add_doc_events(doc, properties) return doc
def album(self, album): title = normalize(album.name) # TODO album years #if Prefs["displayAlbumYear"] and album.getYear() != 0: # title = "%s (%s)" % (title, album.getYear()) cover_url = self.image(album.covers) track_count = None if album.discs: track_count = len(album.discs[0].tracks) return DirectoryObject( key=route_path('album', album.uri), #rating_key=album.uri, title=title, tagline=', '.join([normalize(ar.name) for ar in album.artists]), #track_count=track_count, art=cover_url, thumb=cover_url, )
def triangle(i, amp, phase=0): phase = normalize(phase, 360) i = normalize(i + phase/360.) if i < 0.5: return amp * 2 * i else: return amp * 2 * (1 - i)
def square(i, amp, phase=0): phase = normalize(phase, 360) i = normalize(i + phase/360.) if i < 0.5: return 0 else: return amp
def get_left_elbow_yaw(kinect_pos, shoulder_roll=None, shoulder_pitch=None, world=None): if world is None: world = get_robot_world(kinect_pos) if shoulder_roll is None: shoulder_roll = get_left_shoulder_roll(kinect_pos, world) if shoulder_pitch is None: shoulder_pitch = get_left_shoulder_pitch(kinect_pos, world) shoulder = kinect_pos[kinecthandler.joints_map[joints.SHOULDER_LEFT]] elbow = kinect_pos[kinecthandler.joints_map[joints.ELBOW_LEFT]] wrist = kinect_pos[kinecthandler.joints_map[joints.WRIST_LEFT]] pitch_matrix = np.matrix([[1, 0, 0], [0, np.cos(shoulder_pitch), -np.sin(shoulder_pitch)], [0, np.sin(shoulder_pitch), np.cos(shoulder_pitch)]]) roll_matrix = np.matrix([[np.cos(shoulder_roll), 0, np.sin(shoulder_roll)], [0, 1, 0], [-np.sin(shoulder_roll), 0, np.cos(shoulder_roll)]]) transform = world[0] * pitch_matrix * roll_matrix elbow_shoulder = utils.get_vector(shoulder, elbow, transform=transform) elbow_shoulder = utils.normalize(elbow_shoulder) modified_elbow = [elbow[0], elbow[1] + 2, elbow[2]] elbow_vertical = utils.get_vector(modified_elbow, elbow, transform=transform) elbow_wrist = utils.get_vector(wrist, elbow, transform=transform) elbow_wrist = utils.normalize([elbow_wrist[0], elbow_wrist[1]]) cross_arm = np.cross(elbow_vertical, elbow_shoulder) cross_arm = utils.normalize([cross_arm[0], cross_arm[1]]) # cross_arm = np.array([cross_arm[0], cross_arm[1]]) # elbow_wrist = np.array([elbow_wrist[0], elbow_wrist[1]]) sign = -1 if elbow_wrist[1] > 0: sign = 1 dot = utils.normalized_dot(elbow_wrist, cross_arm) return sign * (np.arccos(dot))
def _tick(self): if self.has_rock: # Try to drop at base. if self._drop_available(): self.has_rock = False self.world.rock_collected() return # Call for a carrier to pick up. self._broadcast_come_message() # Head towards base if carriers not available. if not self.world.carriers: self.dx, self.dy = normalize(self.world.mars_base.x - self.x, self.world.mars_base.y - self.y) else: return else: # Pick up. rock = self._rock_available() if rock: self.has_rock = True self.world.remove_entity(rock) return # Head towards rock. rock = self._sense_rock() if rock: self.dx, self.dy = normalize(rock.x - self.x, rock.y - self.y) # Keep walkin'. while not self._can_move(): self.dx, self.dy = self._get_new_direction() self._move()
def random_rhyme(self): c1 = 'a' c2 = 'a' while not rhymes_with(c1, c2, self.span): c1 = normalize(random.choice(self.words)) c2 = normalize(random.choice(self.words)) return (c1, c2)
def generate_smooth_normals(vertices, faces): print "generating normals for", vertices.shape[0], "vertices" vertex_normals = [[] for _ in xrange(vertices.shape[0])] print len(vertex_normals), "vertices" normalize = lambda n: n / numpy.sqrt(numpy.sum(n ** 2)) for i in range(faces.shape[0]): face_vertices = faces[i, :] v1, v2, v3 = [vertices[face_vertices[j], :] for j in range(3)] n = normalize(numpy.cross(v2 - v1, v3 - v1)) for v in face_vertices: vertex_normals[v].append(n) normals = numpy.ones(vertices.shape) for i in range(normals.shape[0]): if len(vertex_normals[i]) == 0: print "WARNING: no normal for vertex", i continue avg_normal = numpy.mean(numpy.vstack(vertex_normals[i]), 0) normals[i, :] = normalize(avg_normal) return normals
def write_xml(matching, f): """ `matching` contains a list of pairs (tagged_string, its_superstring) Tagged superstrings are written to the file `f' """ for (tagged, raw) in matching: print >>f, '<aff>' #print tagged, raw i = 0 tag_to_write = None for c in tagged: if not normalize(c): continue if len(c) > 1 and c[1] == '/': # closing tag f.write(c) elif len(c) > 1: # opening tag if tag_to_write: f.write(tag_to_write) tag_to_write = None tag_to_write = c else: while normalize(raw[i]) != normalize(c): f.write(xml_escape(raw[i])) i += 1 if tag_to_write: f.write(tag_to_write) tag_to_write = None f.write(xml_escape(raw[i])) i += 1 f.write(''.join(xml_escape(c) for c in raw[i:])) print >>f print >>f, '</aff>'
def normalize_dataset(dataset): fiveMinuteMean = dataset['fiveMinuteMean'] trafficVolume = dataset['trafficVolume'] actualTravelTime = dataset['actualTravelTime'] dataset['fiveMinuteMean'] = normalize(fiveMinuteMean, min(fiveMinuteMean), max(fiveMinuteMean)) dataset['trafficVolume'] = normalize(trafficVolume, min(trafficVolume), max(trafficVolume)) dataset['actualTravelTime'] = normalize(actualTravelTime, min(actualTravelTime), max(actualTravelTime))
def SIM(saliency_map1, saliency_map2): ''' Similarity between two different saliency maps when viewed as distributions (SIM=1 means the distributions are identical). This similarity measure is also called **histogram intersection**. Parameters ---------- saliency_map1 : real-valued matrix If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2. saliency_map2 : real-valued matrix Returns ------- SIM : float, between [0,1] ''' map1 = np.array(saliency_map1, copy=False) map2 = np.array(saliency_map2, copy=False) if map1.shape != map2.shape: map1 = resize(map1, map2.shape, order=3, mode='nearest') # bi-cubic/nearest is what Matlab imresize() does by default # Normalize the two maps to have values between [0,1] and sum up to 1 map1 = normalize(map1, method='range') map2 = normalize(map2, method='range') map1 = normalize(map1, method='sum') map2 = normalize(map2, method='sum') # Compute histogram intersection intersection = np.minimum(map1, map2) return np.sum(intersection)
def track(self, track, index=None): rating_key = track.uri if index is not None: rating_key = '%s::%s' % (track.uri, index) cover_url = self.image(track.album.covers) return TrackObject( items=[ MediaObject( parts=[PartObject( key=self.client.track_url(track), duration=int(track.duration) )], duration=int(track.duration), container=Container.MP3, audio_codec=AudioCodec.MP3 ) ], key=route_path('metadata', str(track.uri)), rating_key=quote(rating_key), title=normalize(track.name), album=normalize(track.album.name), artist=', '.join([normalize(ar.name) for ar in track.artists]), index=int(track.number), duration=int(track.duration), art=cover_url, thumb=cover_url )
def generate_features(self): # prepare variables img_lab = rgb2lab(self._img) segments = slic(img_lab, n_segments=500, compactness=30.0, convert2lab=False) max_segments = segments.max() + 1 # create x,y feather shape = self._img.shape a = shape[0] b = shape[1] x_axis = np.linspace(0, b - 1, num=b) y_axis = np.linspace(0, a - 1, num=a) x_coordinate = np.tile(x_axis, (a, 1,)) # 创建X轴的坐标表 y_coordinate = np.tile(y_axis, (b, 1,)) # 创建y轴的坐标表 y_coordinate = np.transpose(y_coordinate) coordinate_segments_mean = np.zeros((max_segments, 2)) # create lab feather img_l = img_lab[:, :, 0] img_a = img_lab[:, :, 1] img_b = img_lab[:, :, 2] img_segments_mean = np.zeros((max_segments, 3)) for i in xrange(max_segments): segments_i = segments == i coordinate_segments_mean[i, 0] = x_coordinate[segments_i].mean() coordinate_segments_mean[i, 1] = y_coordinate[segments_i].mean() img_segments_mean[i, 0] = img_l[segments_i].mean() img_segments_mean[i, 1] = img_a[segments_i].mean() img_segments_mean[i, 2] = img_b[segments_i].mean() # element distribution wc_ij = np.exp(-cdist(img_segments_mean, img_segments_mean) ** 2 / (2 * self._sigma_distribution ** 2)) wc_ij = wc_ij / wc_ij.sum(axis=1)[:, None] mu_i = np.dot(wc_ij, coordinate_segments_mean) distribution = np.dot(wc_ij, np.linalg.norm(coordinate_segments_mean - mu_i, axis=1) ** 2) distribution = normalize(distribution) distribution = np.array([distribution]).T # element uniqueness feature wp_ij = np.exp( -cdist(coordinate_segments_mean, coordinate_segments_mean) ** 2 / (2 * self._sigma_uniqueness ** 2)) wp_ij = wp_ij / wp_ij.sum(axis=1)[:, None] uniqueness = np.sum(cdist(img_segments_mean, img_segments_mean) ** 2 * wp_ij, axis=1) uniqueness = normalize(uniqueness) uniqueness = np.array([uniqueness]).T # save features and variables self.img_lab = img_lab self.segments = segments self.img_segments_mean = img_segments_mean self.coordinate_segments_mean = coordinate_segments_mean self.uniqueness = uniqueness self.distribution = distribution
def rhymes_with(c1, c2, span): w1 = normalize(c1) w2 = normalize(c2) if len(w1) < 3 or len(w2) < 3: return False if w1 in w2 or w2 in w1: return False return get_end(w1, span) == get_end(w2, span)
def __computeNewMask__(self,frame_feat,shape,models): assert(shape[0]*shape[1] == frame_feat.__len__()) (fg_eigt,fg_mean) = models[0] fg_score = normalize(np.sum(np.dot(frame_feat-fg_mean,fg_eigt.transpose()),1)) (bg_eigt,bg_mean) = models[1] bg_score = normalize(np.sum(np.dot(frame_feat-bg_mean,bg_eigt.transpose()),1)) frames_mask = (fg_score>bg_score+0.2).reshape((shape[0],shape[1])) return frames_mask;
def generate_training_sample(self): """ A function to generate the training samples """ random.seed(42) ## normalize the image norm_img = utils.normalize(self.user_image) ## Get image dimensions w,h = norm_img.shape assert w == h, 'Character image should be square' ## Obtain similar enough random fonts random_fonts = [] n_fonts = self.char_array.shape[0] endloop = 0 n_random = self.n_random while len(random_fonts) < n_random: rdn_img = self.char_array[random.randint(0, n_fonts)] rdn_norm_img = utils.normalize(rdn_img) pbp = utils.pixbypix_similarity(rdn_norm_img, norm_img) if (pbp < 0.9999): random_fonts.append(np.ravel(rdn_norm_img)) ## Bail out of the loop if not enough similar fonts are found if endloop > 20000: n_random = len(random_fonts) break endloop += 1 print 'Found {0} fonts for the random sample'.format(n_random) ## Put together the different types of training samples n_signal = n_random n_variations = n_signal//4 variations = [] variations += utils.scale_variations(norm_img, scale_factors=np.linspace(0.95, 0.99, n_variations)) variations += utils.skew_variations(norm_img, vertical_shear=np.linspace(-0.02, 0.02, math.ceil(math.sqrt(n_variations))), horizontal_shear=np.linspace(-0.02, 0.02, math.ceil(math.sqrt(n_variations)))) variations += utils.rotate_variations(norm_img, angles=np.linspace(-5,5, n_variations)) variations += [norm_img]*n_variations signal = [np.ravel(var) for var in variations] self.X = np.stack(signal + random_fonts, axis=0) self.y = np.array([0]*len(signal) + range(1, n_random+1))
def do_mstep_b(d): result = np.zeros( [ number_of_topics ]) for z in range(number_of_topics): s = 0 for w_index in range(vocabulary_size): count = term_doc_matrix[d][w_index] s = s + count * topic_prob[d, w_index, z] result[z] = s normalize(result) return result
def do_mstep_a(t): result = np.zeros([ vocabulary_size ]) for w_index in range(vocabulary_size): s = 0 for d_index in range(number_of_documents): count = term_doc_matrix[d_index][w_index] s = s + count * topic_prob[d_index, w_index, t] result[w_index] = s normalize(result) return result
def do_estep(d): result = np.zeros([vocabulary_size, number_of_topics]) for w in range(vocabulary_size): prob = document_topic_prob[d, :] * topic_word_prob[:, w] if sum(prob) == 0.0: print 'exit' else: normalize(prob) result[w] = prob return result
def eigenBasedFeats(_input): assert(_input.ndim==1|_input.ndim==3) if(_input.ndim == 3): _input = cv2.cvtColor(_input,cv2.COLOR_RGB2GRAY) eigen = cv2.cornerEigenValsAndVecs(_input,15,3); eigen = eigen.reshape(_input.shape[0], _input.shape[1], 3, 2) texture_mag = normalize(np.sqrt(eigen[:,:,0,0]**2 + eigen[:,:,0,1]**2)) texture_dir1 = normalize(np.arctan2(eigen[:,:,1,1],eigen[:,:,1,0])) texture_dir2 = normalize(np.arctan2(eigen[:,:,2,1],eigen[:,:,2,0])) texture_prop = np.dstack((texture_mag,texture_dir1,texture_dir2)); return texture_prop;
def set_fsb(self, preset): if preset not in self._presets.keys(): return False preset_data = self._presets[preset] pll_data = self.smbus_read_block(I2cDev.PLL_ADDR, 0) pll_data[1] |= (1<<0) | (1<<4) | (1<<6) if len(pll_data) < 17: log("we didn't get enough pll data!") return False # M is always 24 for simplicity # Adjust N for the new M values pll_data[12] = int(24.0/(pll_data[11] & 0x3f) * pll_data[12]) pll_data[16] = int(24.0/(pll_data[15] & 0x3f) * pll_data[16]) pll_data[11] = 24 pll_data[15] = 24 curr_fsb = pll_data[12] curr_pci = pll_data[16] target_fsb, target_pci, voltage_flag = preset_data if curr_fsb == target_fsb and curr_pci == target_pci: return True log("adjustment from %d/%d to %d/%d" % (curr_fsb, curr_pci, target_fsb, target_pci)) # calculate fsb steps fsb_dir = self._stepwidth pci_dir = self._stepwidth # direction if curr_fsb > target_fsb: fsb_dir = -fsb_dir if curr_pci > target_pci: pci_dir = -pci_dir fsb_steps = range(curr_fsb, target_fsb, fsb_dir) pci_steps = range(curr_pci, target_pci, pci_dir) # add target fsb_steps.append(int(target_fsb)) pci_steps.append(int(target_pci)) # normalize lists to the same length utils.normalize(fsb_steps, pci_steps) log("FSB steps: %s" %fsb_steps) log("PCI steps: %s" %pci_steps) # apply steps # set voltage to high during transition utils.ec_gpio_set(utils.EC_VOLTAGE, 1) for fsb, pci in zip(fsb_steps, pci_steps): log("applying step %d/%d" %(fsb, pci)) time.sleep(0.05) pll_data[12] = fsb self.smbus_write_block(I2cDev.PLL_ADDR, 0, pll_data) pll_data[16] = pci time.sleep(0.05) self.smbus_write_block(I2cDev.PLL_ADDR, 0, pll_data) # restore voltage flag utils.ec_gpio_set(utils.EC_VOLTAGE, voltage_flag) self._callback(preset) return True
def from_file(words, length): g = Graph(set(normalize(w) for w in words), length) for i in range(len(words)): w = normalize(words[i]) prev = tuple(normalize(p) for p in words[max(0,i-length):i]) for j in range(len(prev) + 1): if not any(p[-1] == '.' for p in prev[j:]): g.add_edge(prev[j:], w) #g.add_edge(prev[-1], w) g.count_probs() for ((u,v), val) in g.edges.items(): if (len(u) >= 1 and val > 5) or (len(u) >= 2 and val > 1): print(u, v, g.probs[u, v], g.degs[u, len(u)], file=sys.stderr) return g
def process_extract(self, name, extra): native_result = self.extractor.from_native(self.db, name) try: name = self.pypi.real_name(name) except urllib2.HTTPError: logging.warning("PyPi error for {}".format(name)) return versions = self.pypi.package_releases(name) if not versions and not native_result: logging.warn("No versions found for {}".format(name)) return for version in versions: data = self.db.get(name, version) if data: logging.info("Cached {}:{}".format(utils.normalize(name), utils.normalize(version))) elif self.is_version_blacklisted(name, version): logging.info("Blacklisted {}:{}".format(name, version)) else: try: logging.info( "Fetching {}:{}".format( utils.normalize(name), utils.normalize(version) ) ) data = self.extractor.from_pypi(self.db, name, version) # did we get something useful? if not data: self.blacklist_version(name, version) except Exception as e: logging.warn( "Unhandled exception while processing {}:{} - {}".format( name, version, e ) ) self.blacklist_version(name, version) # register data = self.db.get(name, version) if data: self.add_todos_from_db(data['name'], data['version'], extra) self.done_with_all_versions(name, extra)
def do_mstep_b(d, vocabulary_size, number_of_topics, term_doc_matrix, topic_prob): # number_of_topics = getNumberOfTopics() # vocabulary_size = getVocabularySize() # term_doc_matrix = getTermDocMatrix() # topic_word_prob = getTopicWordProb() print 'document %d' %d result = np.zeros([number_of_topics]) for z in range(number_of_topics): s = 0 for w_index in range(vocabulary_size): count = term_doc_matrix[d][w_index] s = s + count * topic_prob[d, w_index, z] result[z] = s normalize(result) return result
def __init__(self, *args, **kwargs): app.Canvas.__init__(self, *args, **kwargs) self.program = gloo.Program(self.read_shader('1.vert'), self.read_shader('3.frag')) # Fill screen with single quad, fragment shader does all the real work self.program["position"] = [(-1, -1), (-1, 1), (1, 1), (-1, -1), (1, 1), (1, -1)] self._starttime = time.time() self.program['time'] = 0 self.program['cameraPos'] = (0.0, 3.0, -6.0) self.program['cameraLookat'] = (0.0, -0.85, 0.5) self.program['lightDir'] = normalize(np.array((-1, -1, -1))) # needs to be normalized self.program['lightColour'] = (1.4, 3.0, 0.3) self.program['diffuse'] = (0.27, 0.27, 0.27) self.program['ambientFactor'] = 0.45 self.program['rotateWorld'] = True self.program['scale'] = 1.5 self.program['offset'] = 1.8 self.program['cubeWidth'] = 1 self.program['angleA'] = 1 self.program['angleB'] = 1 self.apply_zoom() gloo.set_clear_color(color='black') self._timer = app.Timer('auto', connect=self.update, start=True) self.show()
def __of__(self, parent): dirpath = self._dirpath info = _dirreg.getDirectoryInfo(dirpath) if info is None: # for DirectoryViews created with CMF versions before 1.5 # this is basically the old minimalpath() code dirpath = normalize(dirpath) index = dirpath.rfind('Products') if index == -1: index = dirpath.rfind('products') if index != -1: dirpath = dirpath[index+len('products/'):] info = _dirreg.getDirectoryInfo(dirpath) if info is not None: # update the directory view with a corrected path self._dirpath = dirpath elif self._dirpath: warn('DirectoryView %s refers to a non-existing path %s' % (self.id, dirpath), UserWarning) if info is None: data = {} objects = () else: data, objects = info.getContents(_dirreg) s = DirectoryViewSurrogate(self, data, objects) res = s.__of__(parent) return res
def fixed_lag_smoothing(e_t, HMM, d, ev, t): """[Figure 15.6] Smoothing algorithm with a fixed time lag of 'd' steps. Online algorithm that outputs the new smoothed estimate if observation for new time step is given.""" ev.insert(0, None) T_model = HMM.transition_model f = HMM.prior B = [[1, 0], [0, 1]] evidence = [] evidence.append(e_t) O_t = vector_to_diagonal(HMM.sensor_dist(e_t)) if t > d: f = forward(HMM, f, e_t) O_tmd = vector_to_diagonal(HMM.sensor_dist(ev[t- d])) B = matrix_multiplication(inverse_matrix(O_tmd), inverse_matrix(T_model), B, T_model, O_t) else: B = matrix_multiplication(B, T_model, O_t) t = t + 1 if t > d: # always returns a 1x2 matrix return([normalize(i) for i in matrix_multiplication([f], B)][0]) else: return None
def ps_from_copies(sigma,Ne,L,copies,approx=True): #print "ps from copies:", sigma, Ne, L, copies if approx: mu = approx_mu(G, sigma, L, copies) else: mu = mu_from(G,sigma,L,copies) return normalize([phat(k,sigma,mu,Ne,L) for k in range(L+1)])
def parse(st): """ :param st: :return: """ st = normalize(st) s = st.split() if NUMBERS.has_key(s[0].lower()): s[0] = str(NUMBERS[s[0].lower()]) st = ' '.join(s) res = PARSER_RE.match(st) parsed = {} u = res.group('unit') if u: for k,v in UNITS.items(): if u.lower().strip() in v: #print "Updating unit: was: {} now: {}".format(u,k) parsed['unit'] = k if not parsed.has_key('unit'): parsed['unit'] = res.group('unit') or '' parsed['quantity'] = res.group('quantity') or '' parsed['name'] = (res.group('name') or '').strip() #print parsed return parsed
def forward_fc(self, inp, weights, reuse=False): hidden = normalize(tf.tensordot(inp, weights['w1'],1) + weights['b1'], activation=tf.nn.relu, reuse=reuse, scope='0', is_training = self.train_phase) for i in range(1,len(self.dim_hidden)): hidden = normalize(tf.tensordot(hidden, weights['w'+str(i+1)], 1) + weights['b'+str(i+1)], activation=tf.nn.relu, reuse=reuse, scope=str(i+1), is_training = self.train_phase) return tf.tensordot(hidden, weights['w'+str(len(self.dim_hidden)+1)], 1) + weights['b'+str(len(self.dim_hidden)+1)]
def plsa(self, number_of_topics, max_iter, lambda_b=0): ''' Topic Modeling ''' print("EM iteration begins...") # Get vocabulary and number of documents. self.build_vocabulary() number_of_documents = len(self.documents) vocabulary_size = len(self.vocabulary) # build term-doc matrix self.term_doc_matrix = np.zeros([number_of_documents, vocabulary_size], dtype=np.int) for d_index, doc in enumerate(self.documents): term_count = np.zeros(vocabulary_size, dtype=np.int) for word in doc: if word in self.vocabulary: w_index = self.vocabulary.index(word) term_count[w_index] += 1 self.term_doc_matrix[d_index] = term_count # Create the counter arrays. self.document_topic_prob = np.zeros( [number_of_documents, number_of_topics], dtype=np.float) # P(z | d) self.topic_word_prob = np.zeros( [number_of_topics, len(self.vocabulary)], dtype=np.float) # P(w | z) self.topic_prob = np.zeros( [number_of_documents, len(self.vocabulary), number_of_topics], dtype=np.float) # P(z | d, w) self.background_word_prob = np.sum( self.term_doc_matrix, axis=0) / np.sum( self.term_doc_matrix) # Background words probability (1 X W) self.background_prob = np.zeros( [number_of_documents, len(self.vocabulary)], dtype=np.float) # initialize background probability # Initialize print("Initializing...") # randomly assign values self.document_topic_prob = np.random.random(size=(number_of_documents, number_of_topics)) for d_index in range(len(self.documents)): normalize(self.document_topic_prob[d_index] ) # normalize for each document self.topic_word_prob = np.random.random(size=(number_of_topics, len(self.vocabulary))) for z in range(number_of_topics): normalize(self.topic_word_prob[z]) # normalize for each topic # Run the EM algorithm temp = 0 for iteration in range(max_iter): print("Iteration #" + str(iteration + 1) + "...") #print("===E step===") for d_index, document in enumerate(self.documents): for w_index in range(vocabulary_size): denominator = ( (lambda_b * self.background_word_prob[w_index]) + ((1 - lambda_b) * np.sum(self.document_topic_prob[d_index, :] * self.topic_word_prob[:, w_index]))) prob = self.document_topic_prob[ d_index, :] * self.topic_word_prob[:, w_index] if sum(prob) == 0.0: print("d_index = " + str(d_index) + ", w_index = " + str(w_index)) print("self.document_topic_prob[d_index, :] = " + str(self.document_topic_prob[d_index, :])) print("self.topic_word_prob[:, w_index] = " + str(self.topic_word_prob[:, w_index])) print("topic_prob[d_index][w_index] = " + str(prob)) exit(0) else: normalize(prob) self.topic_prob[d_index][w_index] = prob self.background_prob[d_index][w_index] = ( lambda_b * self.background_word_prob[w_index]) / denominator #print("===M step===") # update P(w | z); word-distribution for z in range(number_of_topics): for w_index in range(vocabulary_size): s = 0 for d_index in range(len(self.documents)): count = self.term_doc_matrix[d_index][w_index] s = s + count * ( 1 - self.background_prob[d_index][w_index] ) * self.topic_prob[d_index, w_index, z] self.topic_word_prob[z][w_index] = s normalize(self.topic_word_prob[z]) # update P(z | d); lamda(coverage) for d_index in range(len(self.documents)): for z in range(number_of_topics): s = 0 for w_index in range(vocabulary_size): count = self.term_doc_matrix[d_index][w_index] s = s + count * ( 1 - self.background_prob[d_index][w_index] ) * self.topic_prob[d_index, w_index, z] self.document_topic_prob[d_index][z] = s normalize(self.document_topic_prob[d_index]) self.para = lambda_b if abs(self.loglikelihood() - temp) < 0.0001: break else: temp = self.loglikelihood() self.listLoglikelihood.append(temp) return self.para, self.listLoglikelihood, self.topic_word_prob, self.document_topic_prob
def generate_pva(feature_name='sk_pva_99', labels_name='labels_raw'): print("Extracting the training set of position, velocity and acceleration") data = os.path.join("E:\\program\\Chalearn\\rawdata\\train\\") # Get the list of training samples samples = os.listdir(data) target_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\target\\' output_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\feature\\' + feature_name if not os.path.exists(output_dir): os.makedirs(output_dir) used_joints = [ 'ElbowLeft', 'WristLeft', 'ShoulderLeft', 'HandLeft', 'ElbowRight', 'WristRight', 'ShoulderRight', 'HandRight', 'Head', 'Spine', 'HipCenter' ] njoints = len(used_joints) # f = open('SK_normalization.pkl','r') # normal_params = pickle.load(f) # f.close() # Mean = normal_params['Mean1'] # Std = normal_params['Std1'] count = 0 # target_category = 21 Target_all = [] #Feature_all = numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32) for file_count, file in enumerate(samples): if int(file[-8:-4]) != 417 and int(file[-8:-4]) != 675: print("\t Processing file " + file) # Create the object to access the sample smp = GestureSample(os.path.join(data, file)) # ############################################### # USE Ground Truth information to learn the model # ############################################### # Get the list of actions for this frame gesturesList = smp.getGestures() frame_num = smp.getNumFrames() Feature_Array = np.zeros(shape=(frame_num, 3 * 3 * njoints), dtype=np.float32) # Target = np.zeros( shape=(frame_num, target_category), dtype=np.uint8) #feature generate Skeleton_matrix, valid_skel = Extract_feature_normalized_ALL( smp, used_joints, 1, frame_num) # Feature_Array = Extract_feature_Realtime(Skeleton_matrix, njoints) # Skeleton_matrix = Smooth_Skeleton(Skeleton_matrix, window_len = 5, smooth_mode = 'gaussian') Feature_Array = Extract_feature_pva(Skeleton_matrix, njoints) Mean = np.mean(Feature_Array, axis=0) Std = np.std(Feature_Array, axis=0) Feature_Array = normalize(Feature_Array, Mean, Std) #save sample sk features output_name = '%04d.npy' % count # output_name = file[-8:-4]+'.npy' np.save(os.path.join(output_dir, output_name), Feature_Array) count += 1 #target generate labels = np.zeros(frame_num, np.uint8) for row in gesturesList: labels[int(row[1]) - 1:int(row[2]) - 1] = int(row[0]) Target_all.append(labels) del smp np.save(target_dir + '%s.npy' % labels_name, Target_all)
def run(): if not plugin_conf["enabled"]: return log.debug("[" + __name__ + "] listening for UDP datagrams on port " + str(plugin_conf['port_listen'])) # bind to the network sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) sock.bind(("", plugin_conf['port_listen'])) while True: try: # new data arrives data, addr = sock.recvfrom(1024) log.debug("[" + __name__ + "] received " + data) data = json.loads(data) if data["type"] != "WirelessMessage": continue # check if it belongs to a registered sensor if data["id"] not in nodes: continue node = nodes[data["id"]] # for each measure for message in data["data"]: if message == "STARTED": if default_measure not in node: continue sensor = node[default_measure] log.info("[" + sensor["module_id"] + "][" + sensor["sensor_id"] + "] has just started") # ACK a started message tx(sensor, "ACK", True) # initialize init(sensor) if message == "AWAKE": if default_measure not in node: continue sensor = node[default_measure] # send a message if there is something in the queue if data["id"] in queue and len(queue[data["id"]]) > 0: tx(sensor, queue[data["id"]]) queue[data["id"]] = [] # put it to sleep again sleep(sensor) # other messages can be a measure from the sensor measures = [] # for each registered measure for this node_id for measure, sensor in node.iteritems(): # skip if not a registered measure if not message.startswith(measure): continue measure_data = {} # generate the timestamp date = datetime.datetime.strptime( data["timestamp"], "%d %b %Y %H:%M:%S +0000") measure_data["timestamp"] = utils.timezone( utils.timezone(int(time.mktime(date.timetuple())))) measure_data["key"] = sensor["sensor_id"] # strip out the measure from the value measure_data["value"] = utils.normalize( message.replace(measure, ""), conf["constants"] ["formats"][sensor["format"]]["formatter"]) measures.append(measure_data) sensors.store(sensor, measures) except Exception, e: log.warning("unable to parse " + str(data) + ": " + utils.get_exception(e))
return parameters nll_history = {} nll_history['teacher'] = list() nll_history['student'] = list() rmse_history = {} rmse_history['teacher'] = list() rmse_history['student'] = list() dataset_name = 'kin8nm' for i in range(20): print("\nIteration: ", i) # -- Load training and test data -- (x_train, y_train), (x_test, y_test) = load_data(i, 0.1, dataset_name) (x_train, y_train), (x_test, y_test) = normalize(x_train,y_train,x_test,y_test) network_shape = get_network_shape(dataset_name) #print("x_train.s: ", x_train.shape, ", y_train.shape: ", y_train.shape, # ", x_test.s: ", x_test.shape, ", y_test.shape: ", y_test.shape) #print("network_shape: ", network_shape) # -- Create and train teacher -- teacher_parameters = initialize_teacher_parameters(network_shape) teacher = t.RegressionTeacherModel(teacher_parameters) teacher_history = teacher.train(x_train, y_train, x_test, y_test) # -- Create and train student(using all nets of the teacher) -- M = teacher_parameters['ensemble_nets'] student_parameters = initialize_student_parameters(teacher_parameters) student = s.RegressionStudentModel(teacher, student_parameters)
def plsa(self, nb_of_topics, max_iter): """ EM :param nb_of_topics: number of topic :param max_iter: max number of iterations :return: """ print("EM iteration begins....") self.build_vocabulary() self.generate_term_doc_matrix() # 构造counter arrays # p(zk|di) shape : (nb_of_document, nb_of_topics) self.doc_topic_prob = np.zeros([self.nb_of_documents, nb_of_topics], dtype=np.float) # p(wj|zk) shape: (nb_of_topics, vocab_size) self.topic_word_prob = np.zeros([nb_of_topics, self.vocab_size], dtype=np.float) # p(zk|di, wj) shape: (nb_of_documents, vocab_size, nb_of_topics) self.topic_prob = np.zeros([self.nb_of_documents, self.vocab_size, nb_of_topics], dtype=np.float) # Initialize print("Initializing ... ") # 随机初始 self.doc_topic_prob = np.random.random(size=(self.nb_of_documents, nb_of_topics)) for d_index in range(self.nb_of_documents): # 归一化每个文档 normalize(self.doc_topic_prob[d_index]) self.topic_word_prob = np.random.random(size=(nb_of_topics, self.vocab_size)) for z in range(nb_of_topics): # 归一化每个主题 normalize(self.topic_word_prob[z]) # run EM algorithm # E-Step: # p(zk|di ,wj) = (p(wj|zk) * p(zk|di)) / (Σl=1,K p(wj|zl) * p(zl|di)) # M-Step: # p(wj|zk) = Σi n(di, wj) * p(zk|di, wj) / (Σm=1,M Σi n(di, wj)p(zk|di, wj)) # p(zk|di) = Σj n(di, wj) * p(zk|di, wj) / (Σk=1,K Σj n(di, wj)p(zk|di, wj)) for iter in range(max_iter): print("Iteration #" + str(iter + 1) + "...") print("E step : ") for d_index, document in enumerate(self.documents): for w_index in range(self.vocab_size): # p(zk|di) * p(wj|zk) # shape : (nb_of_topics), prob是个数组,长度为nb_of_topics prob = self.doc_topic_prob[d_index, :] * self.topic_word_prob[:, w_index] if sum(prob) == 0.0: print("d_index = " + str(d_index) + ", w_index = " + str(w_index)) print("self.document_topic_prob[d_index, :] = " + str(self.doc_topic_prob[w_index, :])) print("self.topic_word_prob[:, w_index] = " + str(self.topic_word_prob[:, w_index])) print("topic_prob[d_index][w_index] = " + str(prob)) exit(0) else: normalize(prob) self.topic_prob[d_index][w_index] = prob print("M step : ") # update p(wj|zk) for z in range(nb_of_topics): for w_index in range(self.vocab_size): numer = 0 for d_index in range(self.nb_of_documents): # n(di, wj) count = self.term_doc_matrix[d_index][w_index] # Σi n(di, wj) * p(zk|di, wj) numer += count * self.topic_prob[d_index, w_index, z] self.topic_word_prob[z][w_index] = numer normalize(self.topic_word_prob) # update p(zk|di) for d_index in range(self.nb_of_documents): for z in range(nb_of_topics): numer = 0 for w_index in range(self.vocab_size): # n(di, wj) count = self.term_doc_matrix[d_index][w_index] numer += count * self.topic_prob[d_index, w_index, z] self.doc_topic_prob[d_index][z] = numer normalize(self.doc_topic_prob)
shape=[None, 1, 20], dtype=tf.float32) # enrollment batch (time x batch x n_mel) # embedding lstm (3-layer default) with tf.variable_scope("lstm"): lstm_cells = [ tf.contrib.rnn.LSTMCell(num_units=config.hidden, num_proj=config.proj) for i in range(config.num_layer) ] lstm = tf.contrib.rnn.MultiRNNCell( lstm_cells) # make lstm op and variables outputs, _ = tf.nn.dynamic_rnn( cell=lstm, inputs=inputtensor, dtype=tf.float32, time_major=True) # for TI-VS must use dynamic rnn embedded = outputs[-1] # the last ouput is the embedded d-vector embedded = normalize(embedded) # normalize outputfun = 1 * embedded saver = tf.train.Saver(var_list=tf.global_variables()) with tf.Session() as sess: tf.global_variables_initializer().run() # load model # print("model path :", config.model_path) ckpt = tf.train.get_checkpoint_state( checkpoint_dir=os.path.join(config.model_path, "Check_Point")) ckpt_list = ckpt.all_model_checkpoint_paths loaded = 0 for model in ckpt_list: if config.model_num == int(
def evaluate(self, eval_batches, result_dir=None, result_prefix=None, save_full_info=False): """ Evaluates the model performance on eval_batches and results are saved if specified Args: eval_batches: iterable batch data result_dir: directory to save predicted answers, answers will not be saved if None result_prefix: prefix of the file for saving predicted answers, answers will not be saved if None save_full_info: if True, the pred_answers will be added to raw sample and saved """ pred_answers, ref_answers = [], [] total_loss, total_num = 0, 0 for b_itx, batch in enumerate(eval_batches): feed_dict = { self.p: batch['passage_token_ids'], self.q: batch['question_token_ids'], self.p_length: batch['passage_length'], self.q_length: batch['question_length'], self.start_label: batch['start_id'], self.end_label: batch['end_id'], self.dropout_keep_prob: 1.0 } start_probs, end_probs, loss = self.sess.run( [self.start_probs, self.end_probs, self.loss], feed_dict) total_loss += loss * len(batch['raw_data']) total_num += len(batch['raw_data']) padded_p_len = len(batch['passage_token_ids'][0]) for sample, start_prob, end_prob in zip(batch['raw_data'], start_probs, end_probs): best_answer = self.find_best_answer(sample, start_prob, end_prob, padded_p_len) if save_full_info: sample['pred_answers'] = [best_answer] pred_answers.append(sample) else: pred_answers.append({ 'question_id': sample['question_id'], 'question_type': sample['question_type'], 'answers': [best_answer], 'entity_answers': [[]], 'yesno_answers': [] }) if 'answers' in sample: ref_answers.append({ 'question_id': sample['question_id'], 'question_type': sample['question_type'], 'answers': sample['answers'], 'entity_answers': [[]], 'yesno_answers': [] }) if result_dir is not None and result_prefix is not None: result_file = os.path.join(result_dir, result_prefix + '.json') with open(result_file, 'w') as fout: for pred_answer in pred_answers: fout.write( json.dumps(pred_answer, ensure_ascii=False) + '\n') self.logger.info('Saving {} results to {}'.format( result_prefix, result_file)) # this average loss is invalid on test set, since we don't have true start_id and end_id ave_loss = 1.0 * total_loss / total_num # compute the bleu and rouge scores if reference answers is provided if len(ref_answers) > 0: pred_dict, ref_dict = {}, {} for pred, ref in zip(pred_answers, ref_answers): question_id = ref['question_id'] if len(ref['answers']) > 0: pred_dict[question_id] = normalize(pred['answers']) ref_dict[question_id] = normalize(ref['answers']) bleu_rouge = compute_bleu_rouge(pred_dict, ref_dict) else: bleu_rouge = None return ave_loss, bleu_rouge
def fit(self, path_real, path_synth, path_eval=None, batch_size=30, epochs=500, save_model_every=10, save_images_every=10, use_roids=False, rec_weight=10, dis_weight=10, cycle_weight=10, attr_cycle_b3_weight=5, attr_cycle_a_weight=3, save_summary=True): ''' The training function. args: path_real : the path containing the images from the real domain are stored path_synth : the path where the images from the real domain are stored img_size : the size of the images. It's used to deal with different datasets batch_size : the size of the mini-batch epochs : the number of epochs save_model_every : every how many epochs to create a new checkpoint save_images_every : every how many epochs to save the outputs of the model use_roids : whether or not to use extra conditions, other than the ones of the paper rec_weight : the weight of the reconstruction loss dis_weight : the weight of the disentanglement loss cycle_weight : the weight of the cycle loss attr_cycle_b3_weight : the weight of the attribute cycle loss for b3 attr_cycle_a_weight : the weight of the attribute cycle loss for a save_summary : whether or not to create a summary report for the architecture and the hyperparameters ''' if save_summary: self.log_config(batch_size=batch_size, roids=use_roids, rec_weight=rec_weight, dis_weight=dis_weight, cycle_weight=cycle_weight, attr_cycle_b3_weight=attr_cycle_b3_weight, attr_cycle_a_weight=attr_cycle_a_weight) losses = np.empty((0, 8), float) cnt = 0 for epoch in range(epochs): start = time() print(f'\nEpoch: {epoch} / {epochs}') epoch_losses = np.zeros([8]) # load the datasets data_real = utils.get_batch_flow(path_real, self.img_size, batch_size) data_synth = utils.get_batch_flow( path_synth, tuple([3 * self.img_size[0], self.img_size[1]]), batch_size) n_batches_real = len(data_real) if len( data_real) % batch_size == 0 else len(data_real) - 1 data_real = list(islice(data_real, n_batches_real)) data_synth = list(islice(data_synth, n_batches_real)) for i, (a, b) in enumerate(zip(data_real, data_synth)): print(f'\tBatch: {i+1} / {n_batches_real}\r', end='') # normalize the input images a = utils.normalize(a) b = utils.normalize(b) # split b to b1, b2 and b3 b1, b2, b3 = utils.split_to_attributes(b) # if cnt == 0: # print("####",np.shape(b1)) # for b_ in b1: # b_ = np.array(b_) # g1 = utils.denormalize(b_) # plt.imsave('/content/sample_data/b1.png', g1) # # g1 = utils.denormalize(b1) # # g2 = utils.denormalize(b2) # # g3 = utils.denormalize(b3) # # plt.imsave('/content/sample_data/b1.png', g1) # # plt.imsave('/content/sample_data/b2.png', g2) # # plt.imsave('/content/sample_data/b3.png', g3) # cnt+=1 batch_losses, generated_images = self.train_step( a=a, b1=b1, b2=b2, b3=b3, use_roids=use_roids, rec_weight=rec_weight, dis_weight=dis_weight, cycle_weight=cycle_weight, attr_cycle_b3_weight=attr_cycle_b3_weight, attr_cycle_a_weight=attr_cycle_a_weight) batch_losses = [ batch_losses['reconstruction'], batch_losses['disentanglement'], batch_losses['cycle'], batch_losses['attribute cycle'], batch_losses['generator real'], batch_losses['generator synth'], batch_losses['discriminator real'], batch_losses['discriminator synth'] ] epoch_losses = np.add(epoch_losses, batch_losses) # calculate the losses for the whole epoch epoch_losses = epoch_losses / n_batches_real losses = np.append(losses, [epoch_losses], axis=0) # save only the images from the last batch to save space if save_images_every: if epoch % save_images_every == 0 or epoch + 1 == epochs: if path_eval: self.eval(base_path=path_eval, target_folder=epoch) print( f'\tSaved evaluation rows and gifs for epoch {epoch}!') utils.plot_losses(losses) utils.save(a, b1, b2, b3, generated_images, i, epoch, remove_existing=False) print(f'\tSaved losses and images for epoch {epoch}!') if save_model_every: if epoch % save_model_every == 0 or epoch + 1 == epochs: ckpt_path = self.ckpt_manager.save() print( f'\tSaved checkpoint for epoch {epoch} at {ckpt_path}!\n' ) utils.print_losses(epoch_losses) print(f'\n\tTime taken for epoch {epoch}: {time()-start}sec.')
isodir = "/home/ondrej/data/isochrones/696/halo" data = iso.readfits(isodir + "/datarr.fits") isos = iso.readisos(isodir) t = utils.frange(8, 10.25, 0.001) p = parameters() p.load() m = fit.metallicity(t, p) s = fit.sfr(t, p) w = utils.calculateweights(t, s) if gauss: isow = iso.getisosweights_gauss(w, 10.**t, m, isos, p.sigma) else: isow = iso.getisosweights(w, 10.**t, m, isos) model = iso.computeCMD(isow, isos) #model=isos[0][2]*0.0+1.0 model = utils.normalize(model, sum(data.flat)) #model=model/sum(model.flat) def plot_residuals(d, m): import pylab pylab.subplot(131) pylab.imshow(d, origin='lower', interpolation="nearest") pylab.subplot(132) pylab.imshow(m, origin='lower', interpolation="nearest") pylab.subplot(133) #pylab.imshow((d-m)/m,origin='lower',interpolation="nearest") pylab.imshow(d - m, origin='lower', interpolation="nearest") # pylab.cool() pylab.savefig("graph.eps")
def information_content(values): """Number of bits to represent the probability distribution in values.""" probabilities = normalize(removeall(0, values)) return sum(-p * math.log2(p) for p in probabilities)
def forward_fc(self, inp, weights, reuse=False): hidden = normalize(tf.matmul(inp, weights['w1']) + weights['b1'], activation=tf.nn.relu, reuse=reuse, scope='0') for i in range(1,len(self.dim_hidden)): hidden = normalize(tf.matmul(hidden, weights['w'+str(i+1)]) + weights['b'+str(i+1)], activation=tf.nn.relu, reuse=reuse, scope=str(i+1)) return tf.matmul(hidden, weights['w'+str(len(self.dim_hidden)+1)]) + weights['b'+str(len(self.dim_hidden)+1)]
# compute the average loss avg_loss_G.update(errG.item(), batch_size) avg_loss_D.update(errD.item(), batch_size) avg_loss_A.update(accuracy, batch_size) avg_loss_M.update(mi.item(), batch_size) print('[%d/%d][%d/%d] Loss_D: %.4f (%.4f) Loss_G: %.4f (%.4f) D(x): %.4f D(G(z)): %.4f / %.4f Acc: %.4f (%.4f) MI: %.4f (%.4f)' % (epoch, opt.niter, i, len(dataloader), errD.item(), avg_loss_D.avg, errG.item(), avg_loss_G.avg, D_x, D_G_z1, D_G_z2, accuracy, avg_loss_A.avg, mi.item(), avg_loss_M.avg)) if i % 100 == 0: vutils.save_image( utils.normalize(real_cpu), '%s/real_samples.png' % opt.outf) # print('Label for eval = {}'.format(eval_label)) fake = netG(eval_noise, eval_label) vutils.save_image( utils.normalize(fake.data), '%s/fake_samples_epoch_%03d.png' % (opt.outf, epoch) ) # update eval_label if opt.visualize_class_label >= 0 and opt.label_rotation: eval_label_const = (eval_label_const + 1) % num_classes eval_label.data.fill_(eval_label_const) # compute metrics is_mean, is_std, fid = get_metrics(sampler, num_inception_images=opt.num_inception_images, num_splits=10, prints=True, use_torch=False)
def load_CodaLab_skel(ratio_train=0.9, ration_valid=0.1): print '... loading data' f = file('Feature_train_realtime.pkl','rb' ) Feature_train = cPickle.load(f) f.close() f = file('Feature_all_neutral_realtime.pkl','rb' ) Feature_train_neural = cPickle.load(f) f.close() #Because we have too much neural frames, we only need part of them rand_num = numpy.random.permutation(Feature_train_neural['Feature_all_neutral'].shape[0]) F_neural = Feature_train_neural['Feature_all_neutral'][rand_num] T_neural = Feature_train_neural['Targets_all_new'][rand_num] Feature_all = numpy.concatenate((Feature_train['Feature_all'], F_neural)) Target_all = numpy.concatenate((Feature_train['Targets_all'], T_neural)) rand_num = numpy.random.permutation(Feature_all.shape[0]) Feature_all = Feature_all[rand_num] Target_all = Target_all[rand_num] Target_all_numeric = numpy.argmax(Target_all, axis=1) #train_set, valid_set, test_set format: tuple(input, target) #input is an numpy.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #numpy.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. # we separate the dataset into training: 80%, validation: 10%, testing: 10% train_end = int(rand_num.shape[0]*ratio_train) valid_end = int(rand_num.shape[0]*(ratio_train+ration_valid)) # Wudi made it a small set: train_set_feature = Feature_all[0:train_end,:] train_set_new_target = Target_all_numeric[0:train_end] # Wudi added normalized data for GRBM [train_set_feature_normalized, Mean1, Std1] = preprocessing.scale(train_set_feature) import cPickle as pickle f = open('SK_normalization.pkl','wb') pickle.dump( {"Mean1": Mean1, "Std1": Std1 },f) f.close() train_set_x, train_set_y = shared_dataset( (train_set_feature_normalized, train_set_new_target)) valid_set_feature = Feature_all[train_end:valid_end,:] valid_set_new_target = Target_all_numeric[train_end:valid_end] valid_set_feature = normalize(valid_set_feature, Mean1, Std1) valid_set_x, valid_set_y = shared_dataset((valid_set_feature,valid_set_new_target)) # test feature set test_set_feature = Feature_all[valid_end:,:] test_set_new_target = Target_all_numeric[valid_end:] test_set_feature = normalize(test_set_feature, Mean1, Std1) test_set_x, test_set_y = shared_dataset((test_set_feature,test_set_new_target)) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def _init_params(self): # Multinomial parameter beta: KxV self._beta = np.random.gamma(100, 1. / 100, (self._K, self._V)) self._beta = normalize(self._beta, axis=1)
np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if args.dataset == "polblogs": tmp_adj, features, labels, idx_train, idx_test = load_polblogs_data() else: _, features, labels, idx_train, idx_val, idx_test, tmp_adj = load_data( args.dataset) num_classes = labels.max().item() + 1 # tmp_adj = tmp_adj.toarray() adj = tmp_adj adj = np.eye(tmp_adj.shape[0]) + adj adj, _ = normalize(adj) adj = torch.from_numpy(adj.astype(np.float32)) # print (sum(features)) # print (labels.shape) # print (idx_train.shape) # print (idx_val.shape) # print (idx_test) # s = adj.shape[0] # Model and optimizer model = GCN(nfeat=features.shape[1], nhid=args.hidden, nclass=num_classes, dropout=args.dropout) optimizer = optim.Adam(model.parameters(), lr=args.lr,
if tube_id == -1: pass #pdb.set_trace() if not (tube_id in ids_filter): continue material = objects[j]['material'] shape = objects[j]['shape'] attr = encode_attr(material, shape, bbox_size, args.attr_dim) if args.box_only_flag: xyhw_norm = (xyhw_exp - 0.5) / 0.5 s = [attr, torch.cat([xyhw_norm], 0).unsqueeze(0), tube_id] elif args.new_mode == 1: xyhw_norm = (xyhw_exp - 0.5) / 0.5 img_crop = normalize(crop(img, crop_box_v2, H, W), 0.5, 0.5).permute(2, 0, 1) s = [ attr, torch.cat([xyhw_norm, img_crop], 0).unsqueeze(0), tube_id ] else: img_crop = normalize(crop(img, crop_box_v2, H, W), 0.5, 0.5).permute(2, 0, 1) s = [ attr, torch.cat([xyhw_exp, img_crop], 0).unsqueeze(0), tube_id ] frame_objs.append(s)
import librosa import numpy as np import soundfile from utils import normalize fullpath = '../audios/F001_001.wav' sampling_rate = 22050 y, sr = librosa.core.load(fullpath, sampling_rate) print(y.shape) print('np.mean(y): ' + str(np.mean(y))) print('np.max(y): ' + str(np.max(y))) print('np.min(y): ' + str(np.min(y))) soundfile.write('test.wav', y, sampling_rate) y, sr = librosa.load('test.wav') # Trim the beginning and ending silence yt, index = librosa.effects.trim(y, top_db=20) yt = normalize(yt) print(index) # Print the durations print(librosa.get_duration(y), librosa.get_duration(yt)) print(len(y), len(yt)) soundfile.write('test2.wav', yt, sampling_rate)
def edge_decision(type, alphas, selected_idxs, candidate_flags, probs_history, epoch, model, args): mat = F.softmax(torch.stack(alphas, dim=0), dim=-1).detach() print(mat) importance = torch.sum(mat[:, 1:], dim=-1) # logging.info(type + " importance {}".format(importance)) probs = mat[:, 1:] / importance[:, None] # print(type + " probs", probs) entropy = cate.Categorical(probs=probs).entropy() / math.log(probs.size()[1]) # logging.info(type + " entropy {}".format(entropy)) if args.use_history: # SGAS Cri.2 # logging.info(type + " probs history {}".format(probs_history)) histogram_inter = histogram_average(probs_history, probs) # logging.info(type + " histogram intersection average {}".format(histogram_inter)) probs_history.append(probs) if len(probs_history) > args.history_size: probs_history.pop(0) score = utils.normalize(importance) * utils.normalize( 1 - entropy) * utils.normalize(histogram_inter) # logging.info(type + " score {}".format(score)) else: # SGAS Cri.1 score = utils.normalize(importance) * utils.normalize(1 - entropy) # logging.info(type + " score {}".format(score)) if torch.sum(candidate_flags.int()) > 0 and \ epoch >= args.warmup_dec_epoch and \ (epoch - args.warmup_dec_epoch) % args.decision_freq == 0: masked_score = torch.min(score, (2 * candidate_flags.float() - 1) * np.inf) # cut strategy # selected_edge_idx = torch.argmax(masked_score) # selected_edge_idx = random.randint(0, 13) if type == 'normal': reduction = False selected_edge_idx = random.choice(select_normal) select_normal.remove(selected_edge_idx) logging.info('select_list: {}, choice:{}'.format(select_normal, selected_edge_idx)) elif type == 'reduce': reduction = True selected_edge_idx = random.choice(select_reduce) select_reduce.remove(selected_edge_idx) logging.info('select_list: {}, choice:{}'.format(select_reduce, selected_edge_idx)) else: raise Exception('Unknown Cell Type') selected_op_idx = torch.argmax(probs[selected_edge_idx]) + 1 # add 1 since none op selected_idxs[selected_edge_idx] = selected_op_idx candidate_flags[selected_edge_idx] = False alphas[selected_edge_idx].requires_grad = False candidate_flags, selected_idxs = model.check_edges(candidate_flags, selected_idxs, reduction=reduction) logging.info("#" * 30 + " Decision Epoch " + "#" * 30) logging.info("epoch {}, {}_selected_idxs {}, added edge {} with op idx {}".format(epoch, type, selected_idxs, selected_edge_idx, selected_op_idx)) print(type + "_candidate_flags {}".format(candidate_flags)) score_image(type, score, epoch) return True, selected_idxs, candidate_flags else: logging.info("#" * 30 + " Not a Decision Epoch " + "#" * 30) logging.info("epoch {}, {}_selected_idxs {}".format(epoch, type, selected_idxs)) print(type + "_candidate_flags {}".format(candidate_flags)) score_image(type, score, epoch) return False, selected_idxs, candidate_flags
def test_firstmodel(generator, opt, dataloader, writer, scale): content_criterion = nn.MSELoss() ones_const = Variable(torch.ones(1, 1)) if opt.cuda: generator.cuda() content_criterion.cuda() curr_time = time.time() for epoch in range(opt.nEpochs): mean_generator_content_loss = 0.0 mean_generator_total_loss = 0.0 high_res_fake = 0 for batch_no, data in enumerate(dataloader['test']): high_img, _ = data generator.train(False) input1 = high_img[0, :, :, :] input2 = high_img[1, :, :, :] input3 = high_img[2, :, :, :] input4 = high_img[3, :, :, :] # imshow(input3) for j in range(opt.batchSize): high_img[j] = normalize(high_img[j]) high_comb = torch.cat( [high_img[0], high_img[1], high_img[2], high_img[3]], 0) high_comb = Variable(high_comb[np.newaxis, :]).cuda() # imshow(high_comb.cpu().data) input_comb = torch.cat( [scale(input1), scale(input2), scale(input3), scale(input4)], 0) input_comb = input_comb[np.newaxis, :] if opt.cuda: high_res_real = Variable(high_img.cuda()) high_res_fake = generator(Variable(input_comb).cuda()) outputs = torch.chunk(high_res_fake, 4, 1) outputs = torch.cat( [outputs[0], outputs[1], outputs[2], outputs[3]], 0) # imshow(outputs[0]) generator_content_loss = content_criterion( high_res_fake, high_comb) mean_generator_content_loss += generator_content_loss.data[0] generator_total_loss = generator_content_loss mean_generator_total_loss += generator_total_loss.data[0] if (batch_no % 10 == 0): # # print("phase {} batch no. {} generator_content_loss {} discriminator_loss {}".format(phase, batch_no, generator_content_loss, discriminator_loss)) sys.stdout.write( '\r epoch [%d/%d] batch no. [%d/%d] Generator_content_Loss: %.4f ' % (epoch, opt.nEpochs, batch_no, len( dataloader['test']), generator_content_loss)) mssim = avg_msssim(high_res_real, outputs) psnr_val = psnr(un_normalize(high_res_real), un_normalize(outputs)) writer.add_scalar("test per epoch/PSNR", psnr_val, epoch + 1) # writer.add_scalar(phase+" per epoch/discriminator loss", mean_discriminator_loss/len(dataloader[phase]), epoch+1) writer.add_scalar("test per epoch/generator loss", mean_generator_total_loss / len(dataloader[phase]), epoch + 1) writer.add_scalar("per epoch/total time taken", time.time() - curr_time, epoch + 1) writer.add_scalar("test per epoch/avg_mssim", mssim, epoch + 1) torch.save(generator.state_dict(), '%s/generator_firstfinal.pth' % opt.out)
def generate_eigenjoint(feature_name='sk_eigenjoint_nor_528', labels_name='labels_raw'): # Data folder (Training data) print("Extracting the training files") data = os.path.join("E:\\program\\Chalearn\\rawdata\\train\\") target_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\target\\' # Get the list of training samples samples = os.listdir(data) output_dir = 'E:\\program\\Chalearn\\Chalearn_LSTM\\feature\\' + feature_name if not os.path.exists(output_dir): os.makedirs(output_dir) used_joints = [ 'ElbowLeft', 'WristLeft', 'ShoulderLeft', 'HandLeft', 'ElbowRight', 'WristRight', 'ShoulderRight', 'HandRight', 'Head', 'Spine', 'HipCenter' ] njoints = len(used_joints) f = open('SK_normalization.pkl', 'r') normal_params = pickle.load(f) f.close() Mean = normal_params['Mean1'] Std = normal_params['Std1'] count = 0 # target_category = 21 Target_all = [] #Feature_all = numpy.zeros(shape=(400000, (njoints*(njoints-1)/2 + njoints**2)*3),dtype=numpy.float32) for file_count, file in enumerate(samples): if int(file[-8:-4]) != 417 and int(file[-8:-4]) != 675: print("\t Processing file " + file) # Create the object to access the sample smp = GestureSample(os.path.join(data, file)) # ############################################### # USE Ground Truth information to learn the model # ############################################### # Get the list of actions for this frame gesturesList = smp.getGestures() frame_num = smp.getNumFrames() Feature_Array = np.zeros( shape=(frame_num, (njoints * (njoints - 1) / 2 + njoints**2) * 3), dtype=np.float32) # Target = np.zeros( shape=(frame_num, target_category), dtype=np.uint8) #feature generate Skeleton_matrix, valid_skel = Extract_feature_UNnormalized( smp, used_joints, 1, frame_num) Feature_Array = Extract_feature_Realtime(Skeleton_matrix, njoints) Feature_Array = normalize(Feature_Array, Mean, Std) add_ = Feature_Array[-1].reshape((1, Feature_Array.shape[1])) Feature_Array = np.concatenate((Feature_Array, add_), axis=0) #save sample sk features output_name = '%04d.npy' % count count += 1 np.save(os.path.join(output_dir, output_name), Feature_Array) #target generate labels = np.zeros(frame_num, np.uint8) for row in gesturesList: labels[int(row[1]) - 1:int(row[2]) - 1] = int(row[0]) Target_all.append(labels) del smp np.save(target_dir + '%s.npy' % labels_name, Target_all)
def test_single(generator, discriminator, opt, dataloader, scale): generator.load_state_dict(torch.load(opt.generatorWeights)) discriminator.load_state_dict(torch.load(opt.discriminatorWeights)) content_criterion = nn.MSELoss() adversarial_criterion = nn.BCELoss() ones_const = Variable(torch.ones(1, 1)) if opt.cuda: generator.cuda() discriminator.cuda() content_criterion.cuda() adversarial_criterion.cuda() ones_const = ones_const.cuda() curr_time = time.time() # for epoch in range(opt.nEpochs): mean_generator_content_loss = 0.0 mean_generator_adversarial_loss = 0.0 mean_generator_total_loss = 0.0 mean_discriminator_loss = 0.0 high_res_fake = 0 for batch_no, data in enumerate(dataloader['test']): high_img, _ = data generator.train(False) discriminator.train(False) print("batch no. {} shape of input {}".format(batch_no, high_img.shape)) input1 = high_img[0, :, :, :] input2 = high_img[1, :, :, :] input3 = high_img[2, :, :, :] input4 = high_img[3, :, :, :] inputs = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize) # imshow(input3) for j in range(opt.batchSize): inputs[j] = scale(high_img[j]) high_img[j] = normalize(high_img[j]) high_comb = torch.cat( [high_img[0], high_img[1], high_img[2], high_img[3]], 0) high_comb = Variable(high_comb[np.newaxis, :]).cuda() # imshow(high_comb.cpu().data) input_comb = torch.cat( [scale(input1), scale(input2), scale(input3), scale(input4)], 0) # inputs = [scale(input1), scale(input2), scale(input3), scale(input4)] input_comb = input_comb[np.newaxis, :] if opt.cuda: # optimizer.zero_grad() high_res_real = Variable(high_img.cuda()) high_res_fake = generator(Variable(input_comb).cuda()) target_real = Variable(torch.rand(1, 1) * 0.5 + 0.7).cuda() target_fake = Variable(torch.rand(1, 1) * 0.3).cuda() outputs = torch.chunk(high_res_fake, 4, 1) outputs = torch.cat( [outputs[0], outputs[1], outputs[2], outputs[3]], 0) # imshow(outputs[0]) generator_content_loss = content_criterion(high_res_fake, high_comb) mean_generator_content_loss += generator_content_loss.data[0] generator_adversarial_loss = adversarial_criterion( discriminator(high_res_fake), ones_const) mean_generator_adversarial_loss += generator_adversarial_loss.data[ 0] generator_total_loss = generator_content_loss + 1e-3 * generator_adversarial_loss mean_generator_total_loss += generator_total_loss.data[0] discriminator_loss = adversarial_criterion(discriminator(high_comb), target_real) + \ adversarial_criterion(discriminator(Variable(high_res_fake.data)), target_fake) mean_discriminator_loss += discriminator_loss.data[0] psnr_val = psnr(un_normalize(outputs), un_normalize(high_res_real)) print(psnr_val) imsave(outputs.cpu().data, train=False, epoch=batch_no, image_type='fake') imsave(high_img, train=False, epoch=batch_no, image_type='real') imsave(inputs, train=False, epoch=batch_no, image_type='low')
def _analysis(self, step , train_batches, dropout_keep_prob): """ Trains the model for a single epoch. Args: train_batches: iterable batch data for training dropout_keep_prob: float value indicating dropout keep probability """ total_loss = 0 num_loss = 0 total_recall = [0.0,0.0,0.0,0.0] num_recall =0 batch_start_time = 0 batch_start_time = time.time() pred_answers, ref_answers = [], [] fake_answers = [] for fbitx, batch in enumerate(train_batches, 1): step += 1 if fbitx % 1000 == 0: print '------ Batch Question: ' + str(fbitx) trees = [] batch_tree_set = [] batch_size = len(batch['question_ids']) #print ('batch_size)', batch_size) for bitx in range(batch_size): tree = {'question_id': batch['question_ids'][bitx], 'question_token_ids': batch['question_token_ids'][bitx], 'q_length': batch['question_length'][bitx], 'passage_token_ids_list': batch['passage_token_ids_list'][bitx], 'passage_title_token_ids_list': batch['passage_title_token_ids_list'][bitx], 'passage_title_length_list': batch['passage_title_length_list'][bitx], 'passage_sentence_token_ids_list': batch['passage_sentence_token_ids_list'][bitx], 'passage_sen_length': batch['passage_sen_length_list'][bitx], #'p_length': batch['passage_length'][bitx], 'passage_is_selected_list': batch['passage_is_selected_list'][bitx], 'question_type': batch['question_types'][bitx], 'ref_answers': batch['ref_answers'][bitx], 'fake_answers': batch['fake_answers'][bitx], 'segmented_answers': batch['segmented_answers'][bitx] } ref_answers.append({'question_id': tree['question_id'], 'question_type': tree['question_type'], 'answers': tree['ref_answers']}) trees.append(tree) #print batch batch_tree = SearchTree(self.tfg, tree, self.max_a_len, self.search_time, self.beta, self.m_value, dropout_keep_prob) batch_tree_set.append(batch_tree) # for every data in batch do training process for idx, batch_tree in enumerate(batch_tree_set,1): pred_answer, fake_answer, recall = batch_tree.train_analysis(step) pred_answers.append(pred_answer) fake_answers.append(fake_answer) total_recall[0] += recall[0] total_recall[1] += recall[1] total_recall[2] += recall[2] total_recall[3] += recall[3] num_recall += 1 print('ave select recall', total_recall[0] / num_recall) print('ave select f1', total_recall[1] / num_recall) print('ave all recall', total_recall[2] / num_recall) print('ave all f1', total_recall[3] / num_recall) ii = 0 if len(ref_answers) > 0: pred_dict, ref_dict = {}, {} for pred, ref in zip(pred_answers, ref_answers): ii += 1 question_id = ref['question_id'] #print('type', question_id) if len(ref['answers']) > 0: ref_dict[question_id] = normalize(ref['answers']) pred_dict[question_id] = normalize(pred['answers']) bleu_rouge = compute_bleu_rouge(pred_dict, ref_dict) else: bleu_rouge = None value_with_mcts = bleu_rouge print ('pre_scor',value_with_mcts) #return 1.0 * total_loss / num_loss, step return 0, step
def build_adj_mat(self): adj = self.build_adj_original() adj = normalize(adj + sp.eye(adj.shape[0])) adj = sparse_mx_to_torch_sparse_tensor(adj) return adj
def svm(training_file, development_file, test_file, counts): twords, tlabels_true = hs.load_file(training_file) dwords, dlabels_true = hs.load_file(development_file) test_words = utils.load_test(test_file) ## Length tlength_feature = hs.length_feature(twords) tlength_normalized, tl_mean, tl_std = utils.normalize(tlength_feature) dlength_feature = hs.length_feature(dwords) dlength_normalized = utils.normalize_with_params(dlength_feature, tl_mean, tl_std) ## Frequency tfrequency_feature = hs.frequency_feature(twords, counts) tfrequency_normalized, tf_mean, tf_std = utils.normalize( tfrequency_feature) dfrequency_feature = hs.frequency_feature(dwords, counts) dfrequency_normalized = utils.normalize_with_params( dfrequency_feature, tf_mean, tf_std) ## Syllables tsyllables_feature = features.syllables_feature(twords) tsyllables_normalized, tsy_mean, tsy_std = utils.normalize( tsyllables_feature) dsyllables_feature = features.syllables_feature(dwords) dsyllables_normalized = utils.normalize_with_params( dsyllables_feature, tsy_mean, tsy_std) ## Vowels tvowels_feature = features.vowels_feature(twords) tvowels_normalized, tv_mean, tv_std = utils.normalize(tvowels_feature) dvowels_feature = features.vowels_feature(dwords) dvowels_normalized = utils.normalize_with_params(dvowels_feature, tv_mean, tv_std) ## Consonants tconsonant_feature = features.vowels_feature(twords) tconsonant_normalized, tc_mean, tc_std = utils.normalize( tconsonant_feature) dconsonant_feature = features.vowels_feature(dwords) dconsonant_normalized = utils.normalize_with_params( dconsonant_feature, tc_mean, tc_std) ## Senses tsenses_feature = features.senses_feature(twords) tsenses_normalized, tse_mean, tse_std = utils.normalize(tsenses_feature) dsenses_feature = features.senses_feature(dwords) dsenses_normalized = utils.normalize_with_params(dsenses_feature, tse_mean, tse_std) ## Hypernyms thypernyms_feature = features.hypernyms_feature(twords) thypernyms_normalized, th_mean, th_std = utils.normalize( thypernyms_feature) dhypernyms_feature = features.hypernyms_feature(dwords) dhypernyms_normalized = utils.normalize_with_params( dhypernyms_feature, th_mean, th_std) x_train = np.column_stack((tlength_normalized, tfrequency_normalized, tsyllables_normalized, tsenses_normalized)) y = tlabels_true x_dev = np.column_stack((dlength_normalized, dfrequency_normalized, dsyllables_normalized, dsenses_normalized)) clf = SVC(C=48, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) clf.fit(x_train, y) y_pred = clf.predict(x_dev) daccuracy = hs.get_accuracy(y_pred, dlabels_true) dprecision = hs.get_precision(y_pred, dlabels_true) drecall = hs.get_recall(y_pred, dlabels_true) dfscore = hs.get_fscore(y_pred, dlabels_true) # Test Set # test_length_feature = hs.length_feature(test_words) # test_frequency_feature = hs.frequency_feature(test_words, counts) # test_syllables_feature = features.syllables_feature(test_words) # test_senses_feature = features.senses_feature(test_words) # # test_length_normalized = utils.normalize_with_params(test_length_feature, tl_mean, tl_std) # test_frequency_normalized = utils.normalize_with_params(test_frequency_feature, tf_mean, tf_std) # test_syllables_normalized = utils.normalize_with_params(test_syllables_feature, tsy_mean, tsy_std) # test_senses_normalized = utils.normalize_with_params(test_senses_feature, tse_mean, tse_std) # # x_test = np.column_stack((test_length_normalized, test_frequency_normalized, # test_syllables_normalized, test_senses_normalized)) # y_pred_test = clf.predict(x_test) # # f = open('test_labels.txt', 'w') # for item in y_pred_test: # print(item, file=f) # f.close() # training_performance = (tprecision, trecall, tfscore) development_performance = (daccuracy, dprecision, drecall, dfscore) return development_performance
def create_non_intersecting(): pr = np.array([DEFAULT_POSITION[0], DEFAULT_POSITION[1], 0]) nr = utils.normalize(np.array([0, LARGE_DISTANCE, 1])) return Ray(pr, nr)
def random_forest(training_file, development_file, test_file, counts): twords, tlabels_true = hs.load_file(training_file) dwords, dlabels_true = hs.load_file(development_file) test_words = utils.load_test(test_file) ## Length tlength_feature = hs.length_feature(twords) tlength_normalized, tl_mean, tl_std = utils.normalize(tlength_feature) dlength_feature = hs.length_feature(dwords) dlength_normalized = utils.normalize_with_params(dlength_feature, tl_mean, tl_std) ## Frequency tfrequency_feature = hs.frequency_feature(twords, counts) tfrequency_normalized, tf_mean, tf_std = utils.normalize( tfrequency_feature) dfrequency_feature = hs.frequency_feature(dwords, counts) dfrequency_normalized = utils.normalize_with_params( dfrequency_feature, tf_mean, tf_std) ## Syllables tsyllables_feature = features.syllables_feature(twords) tsyllables_normalized, tsy_mean, tsy_std = utils.normalize( tsyllables_feature) dsyllables_feature = features.syllables_feature(dwords) dsyllables_normalized = utils.normalize_with_params( dsyllables_feature, tsy_mean, tsy_std) ## Vowels tvowels_feature = features.vowels_feature(twords) tvowels_normalized, tv_mean, tv_std = utils.normalize(tvowels_feature) dvowels_feature = features.vowels_feature(dwords) dvowels_normalized = utils.normalize_with_params(dvowels_feature, tv_mean, tv_std) ## Consonants tconsonant_feature = features.vowels_feature(twords) tconsonant_normalized, tc_mean, tc_std = utils.normalize( tconsonant_feature) dconsonant_feature = features.vowels_feature(dwords) dconsonant_normalized = utils.normalize_with_params( dconsonant_feature, tc_mean, tc_std) ## Senses tsenses_feature = features.senses_feature(twords) tsenses_normalized, tse_mean, tse_std = utils.normalize(tsenses_feature) dsenses_feature = features.senses_feature(dwords) dsenses_normalized = utils.normalize_with_params(dsenses_feature, tse_mean, tse_std) ## Hypernyms thypernyms_feature = features.hypernyms_feature(twords) thypernyms_normalized, th_mean, th_std = utils.normalize( thypernyms_feature) dhypernyms_feature = features.hypernyms_feature(dwords) dhypernyms_normalized = utils.normalize_with_params( dhypernyms_feature, th_mean, th_std) x_train = np.column_stack( (tlength_normalized, tfrequency_normalized, tsyllables_normalized, tsenses_normalized, thypernyms_normalized)) y = tlabels_true x_dev = np.column_stack( (dlength_normalized, dfrequency_normalized, dsyllables_normalized, dsenses_normalized, dhypernyms_normalized)) clf = RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=7, max_features=3, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=8, min_samples_split=50, min_weight_fraction_leaf=0.0, n_estimators=70, n_jobs=None, oob_score=False, random_state=0, verbose=0, warm_start=False) clf.fit(x_train, y) y_pred = clf.predict(x_dev) daccuracy = hs.get_accuracy(y_pred, dlabels_true) dprecision = hs.get_precision(y_pred, dlabels_true) drecall = hs.get_recall(y_pred, dlabels_true) dfscore = hs.get_fscore(y_pred, dlabels_true) # Test Set test_length_feature = hs.length_feature(test_words) test_frequency_feature = hs.frequency_feature(test_words, counts) test_syllables_feature = features.syllables_feature(test_words) test_vowels_feature = features.vowels_feature(test_words) test_consonants_feature = features.consonants_feature(test_words) test_senses_feature = features.senses_feature(test_words) test_hypernyms_feature = features.hypernyms_feature(test_words) test_length_normalized = utils.normalize_with_params( test_length_feature, tl_mean, tl_std) test_frequency_normalized = utils.normalize_with_params( test_frequency_feature, tf_mean, tf_std) test_syllables_normalized = utils.normalize_with_params( test_syllables_feature, tsy_mean, tsy_std) test_vowels_normalized = utils.normalize_with_params( test_vowels_feature, tv_mean, tv_std) test_consonants_normalized = utils.normalize_with_params( test_consonants_feature, tc_mean, tc_std) test_senses_normalized = utils.normalize_with_params( test_senses_feature, tse_mean, tse_std) test_hypernyms_normalized = utils.normalize_with_params( test_hypernyms_feature, th_mean, th_std) x_test = np.column_stack( (test_length_normalized, test_frequency_normalized, test_syllables_normalized, test_senses_normalized, test_hypernyms_normalized)) y_pred_test = clf.predict(x_test) f = open('test_labels.txt', 'w') for item in y_pred_test: print(item, file=f) f.close() # training_performance = (tprecision, trecall, tfscore) development_performance = (daccuracy, dprecision, drecall, dfscore) return development_performance
def universal_attack(attack_epoch, max_epoch): model.eval() delta = 0.1 fooling_rate = 0.0 overshoot = 0.02 # max_iter_df = 10 max_iter_df = 30 v = np.zeros(tmp_adj.shape[0]).astype(np.float32) # stdv = 1./math.sqrt(tmp_adj.shape[0]) # v = np.random.uniform(-stdv, stdv, tmp_adj.shape[0]) cur_foolingrate = 0.0 epoch = 0 early_stop = 0 results = [] folder_path = op.join("./", "perturbation_results") if not op.exists(folder_path): os.mkdir(folder_path) while fooling_rate < 1 - delta and epoch < max_epoch: epoch += 1 train_idx = idx_train.cpu().numpy() np.random.shuffle(train_idx) ############################################### print('deepfooling...') attack_time = time.time() for k in train_idx: print('deepfool node', k) #add v to see if the attack succeeds innormal_x_p = add_perturb(tmp_adj, k, v) ##################whether to use filtering # innormal_x_p = np.where(innormal_x_p<0.5, 0, 1) x_p, degree_p = normalize(innormal_x_p + np.eye(tmp_adj.shape[0])) #A' = A + I x_p = torch.from_numpy(x_p.astype(np.float32)) x_p = x_p.cuda() output = model(features, x_p) # print ('output', output[k]) if int(torch.argmax(output[k])) == int(torch.argmax( ori_output[k])): dr, iter = deepfool(innormal_x_p, x_p, k, num_classes, degree_p[k], overshoot, max_iter_df) # print ('dr', dr) # print ('iter', iter) # print ('the old perturbation matrix wass', v) # print ('the old distance of perturbation is', np.linalg.norm(v.flatten(1))) if iter < max_iter_df - 1: v = v + dr # Project on l_p ball v = proj_lp(v) # print ('L1 norm ov v', torch.norm(v, p=1)) # print ('L2 norm ov v', torch.norm(v, p=2)) else: print('cant attack this node') else: print('attack succeed') # print ('the prediction of k node', int(torch.argmax(output[k]))) # print ('the true label', int(labels[k])) print('the deepfooling time cost is', time.time() - attack_time) ################################################### # v = np.random.rand(tmp_adj.shape[0]) print('the perturbation matrix is', v) print('testing the attack success rate') res = [] ################# v = np.where(v > 0.5, 1, 0) ################## for k in train_idx: print('test node', k) innormal_x_p = add_perturb(tmp_adj, k, v) ############ # innormal_x_p = np.where(innormal_x_p<0.5, 0, 1) ############ x_p, degree_p = normalize(innormal_x_p + np.eye(tmp_adj.shape[0])) x_p = torch.from_numpy(x_p.astype(np.float32)) x_p = x_p.cuda() output = model(features, x_p) if int(torch.argmax(output[k])) == int(torch.argmax( ori_output[k])): res.append(0) else: res.append(1) fooling_rate = float(sum(res) / len(res)) print('the current fooling rate is', fooling_rate) # print ('the current fooling rate is', fooling_rate) #################### # if fooling_rate > cur_foolingrate: ##################### if fooling_rate >= cur_foolingrate: cur_foolingrate = fooling_rate file_path = op.join( folder_path, '{1}_xi{2}_epoch100/perturbation_{1}_{0}.txt'.format( attack_epoch, args.dataset, args.radius)) with open(file_path, "w") as f: for i in v: f.write(str(i) + '\n') ################# results.append(fooling_rate) if epoch > 3: if fooling_rate == results[-2]: early_stop += 1 else: early_stop = 0 if early_stop == 15: break ##################### return cur_foolingrate
def sobel(frame): frame = GRAY(frame) gx = cv2.Sobel(frame, cv2.CV_32F, 1, 0) gy = cv2.Sobel(frame, cv2.CV_32F, 0, 1) mag, ang = cv2.cartToPolar(gx, gy) return normalize(mag)
def compute_fgbg_masks(saliency, bg_variation): saliency_prob = normalize(6 * bg_variation + 4 * saliency) _, fgmask = cv2.threshold(saliency_prob, 0.6, 1, cv2.THRESH_BINARY) _, bgmask = cv2.threshold(bg_variation, 0.1, 1, cv2.THRESH_BINARY_INV) return fgmask, bgmask