def generate_batches(): if shuffle: random.shuffle(target_list) for video_dir in target_list: # load boxes.json # with open(os.path.join(video_dir, 'boxes.json')) as f: # boxes = json.loads(f.read()) # load landmarks.json with open(os.path.join(video_dir, 'landmarks.json')) as f: landmarks = json.loads(f.read()) img_paths = sorted(glob.glob(os.path.join(video_dir, '*.jpg'))) reference = cv2.imread(img_paths[0], cv2.IMREAD_COLOR) reference = cv2.cvtColor(reference, cv2.COLOR_BGR2RGB) reference = cv2.resize(reference, (self.width, self.resize_height)) reference = common.normalize(reference) img_paths = img_paths[1:] if shuffle: random.shuffle(img_paths) for img_path in img_paths: img = cv2.imread(img_path, cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (self.width, self.resize_height)) # Normalization img = common.normalize(img) basename = os.path.basename(img_path) landmark = np.array(landmarks[basename]).astype(np.float32) landmark_img = common.landmarks_to_img(landmark, img.shape) yield (reference, landmark_img), img
def __init__(self, canvas, xpos, width, height, second_height): self.canvas = canvas self.xpos = xpos self.width = normalize(width) self.height = normalize(height) self.second_height = normalize(second_height) self.display_width = self.width self.display_height = self.height self.second_display_height = self.second_height self.color = canvas.parent.get_glass_color() self.id = canvas.create_polygon([ self.xpos,\ GLASS_BASELINE - self.display_height,\ self.xpos,\ GLASS_BASELINE,\ self.xpos + self.display_width,\ GLASS_BASELINE,\ self.xpos + self.display_width,\ GLASS_BASELINE - self.second_display_height ], fill=self.color ) self.label = canvas.create_text( self.xpos + (self.display_width / 2), GLASS_BASELINE - self.display_height - 30, text=f"{self.width}x{self.height}x{self.second_height}") canvas.tag_bind( self.id, "<Button-1>", lambda *args: EditGlassPopup( canvas, self.id, self.width, self.height, self.second_height))
def keras_load_data_split(trX, trYi, tsX, clean=True): # Image Cleaning via Erosion if clean: kernel = np.ones((2, 1)) trX = np.array([cv2.erode(img, kernel) for img in trX]).reshape(trX.shape) tsX = np.array([cv2.erode(img, kernel) for img in tsX]).reshape(tsX.shape) # zero mean, unit variance trX = normalize(trX) tsX = normalize(tsX) # Split datasets x_test = tsX x_train, x_val, y_train, y_val = train_test_split(trX, trYi, test_size=0.3) # Reshape input_shape = (28, 28, 1) x_train = x_train.reshape(x_train.shape[0], *input_shape) x_val = x_val.reshape(x_val.shape[0], *input_shape) x_test = x_test.reshape(x_test.shape[0], *input_shape) print('x_train shape:', x_train.shape) print(x_train.shape[0], 'training samples') print(x_val.shape[0], 'validation samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, 20) y_val = keras.utils.to_categorical(y_val, 20) return x_train, y_train, x_val, y_val, x_test
def kiritchenko(a_N, a_crp_files, a_pos, a_neg, a_neut, a_pos_re=NONMATCH_RE, a_neg_re=NONMATCH_RE): """Method for generating sentiment lexicons using Kiritchenko's approach. @param a_N - number of terms to extract @param a_crp_files - files of the original corpus @param a_pos - initial set of positive terms to be expanded @param a_neg - initial set of negative terms to be expanded @param a_neut - initial set of neutral terms to be expanded @param a_pos_re - regular expression for matching positive terms @param a_neg_re - regular expression for matching negative terms @return list of terms sorted according to their polarity scores """ a_pos = set(normalize(w) for w in a_pos) a_neg = set(normalize(w) for w in a_neg) a_neut = set(normalize(w) for w in a_neut) stat = defaultdict(lambda: [0, 0, 0]) n_pos, n_neg, n_neut = _read_files(stat, a_crp_files, a_pos, a_neg, a_neut, a_pos_re, a_neg_re) ret = _stat2scores(stat, n_pos, n_neg, n_neut, a_pos, a_neg, a_neut) ret.sort(key=lambda el: abs(el[-1]), reverse=True) if a_N >= 0: del ret[a_N:] return ret
def specular(v, v_n, view_pt, light_pt, coef=10): v_n /= normalize(v_n) view_pt = view_pt[0][:3] v_i = v - light_pt v_i /= normalize(v_i) v_reflected = -v_i - 2 * -v_i.dot(v_n) * v_n v_viewer = Vector(view_pt - v).normalized().dir intensity_specular = max(v_reflected.dot(v_viewer), 0.) intensity_specular = pow(intensity_specular, coef) return intensity_specular
def rename_user(self,user,new_nick): self.lock.acquire() try: self.users[normalize(new_nick)]=user try: del self.users[normalize(user.nick)] except KeyError: pass user.nick=new_nick finally: self.lock.release()
def setView(self, eye, target, up): zaxis = common.normalize(eye - target) xaxis = common.normalize(np.cross(up, zaxis)) yaxis = np.cross(zaxis, xaxis) xlist = xaxis.tolist() ylist = yaxis.tolist() zlist = zaxis.tolist() xlist.append(-np.dot(xaxis, eye)) ylist.append(-np.dot(yaxis, eye)) zlist.append(-np.dot(zaxis, eye)) orientation = np.array([xlist, ylist, zlist, [0,0,0,1]]) return np.transpose(orientation)
def __init__(self, canvas, xpos, height, is_last): self.canvas = canvas self.xpos = xpos self.display_width = POST_DISPLAY_WIDTH self.height = normalize(height) self.display_height = self.height + 5 self.is_last = is_last self.color = "black" self.id = canvas.create_rectangle(self.xpos, CANVAS_BASELINE - self.display_height, self.xpos + self.display_width, CANVAS_BASELINE, fill=self.color) self.base = canvas.create_rectangle( self.xpos - POST_BASE_DISPLAY_WIDTH, CANVAS_BASELINE - POST_BASE_DISPLAY_HEIGHT, self.xpos + self.display_width + POST_BASE_DISPLAY_WIDTH, CANVAS_BASELINE, fill=self.color) canvas.tag_bind( self.id, "<Button-1>", lambda *args: EditPostOrWallmountPopup( canvas, self.id, self.height))
def main(): frames_path = os.path.join(common.DATASET_ROOT, '701_StillsRaw_full/') labels_path = os.path.join(common.DATASET_ROOT, 'LabeledApproved_full/') fnames = glob.glob(os.path.join(frames_path, '*.png')) lnames = [ os.path.join(labels_path, os.path.basename(fn)[:-4] + '_L.png') for fn in fnames ] imgs = np.stack([common.open_image(fn) for fn in fnames]) labels = np.stack([common.open_image(fn) for fn in lnames]) # Normalize and standardize imgs = common.normalize(imgs) imgs = common.standardize(imgs) common.save_array( os.path.join(common.DATASET_ROOT, 'results/imgs.bc'), imgs) common.save_array( os.path.join(common.DATASET_ROOT, 'results/labels.bc'), labels) # Convert labels label_codes, label_names = parse_label_colors( os.path.join(common.DATASET_ROOT, 'label_colors.txt')) code2id = {v: k for k, v in enumerate(label_codes)} failed_code = len(label_codes) + 1 label_codes.append((0, 0, 0)) label_names.append('unk') labels_int = conv_all_labels(labels, code2id, failed_code, imgs.shape[0], imgs.shape[1], imgs.shape[2]) labels_int[labels_int == failed_code] = 0 common.save_array( os.path.join(common.DATASET_ROOT, 'results/labels_int.bc'), labels_int)
def policy_loop(state_, t, total_cost, total_trans_err, _): mu = self.policy.forward(state_, autoencoder) if self.env.continuous_actions: eta = self.env.sigma * tf.random_normal(shape=tf.shape(mu), mean=self.noise_mean) a = mu + eta else: a = common.gumbel_softmax_sample(logits=mu, temperature=self.temp) # minimize the gap between agent logit (d[:,0]) and expert logit (d[:,1]) d = self.discriminator.forward(state_, a, autoencoder) cost = self.al_loss(d) # add step cost total_cost += tf.mul(tf.pow(self.gamma, t), cost) # get next state if self.env.continuous_actions: a_sim = common.denormalize(a, self.er_expert.actions_mean, self.er_expert.actions_std) else: a_sim = tf.argmax(a, dimension=1) state_env, _, env_term_sig, = self.env.step(a_sim, mode='tensorflow')[:3] state_e = common.normalize(state_env, self.er_expert.states_mean, self.er_expert.states_std) state_e = tf.stop_gradient(state_e) state_a, _, _ = self.forward_model.forward([state_, a, s]) state, nu = common.re_parametrization(state_e=state_e, state_a=state_a) total_trans_err += tf.reduce_mean(abs(nu)) t += 1 return state, t, total_cost, total_trans_err, env_term_sig
def genFeatureAfterNormalize(fin, fout, desc, mins, maxs): head = desc['features'][0]['name'] for f in desc['features'][1:]: head = head + ',%s' % f['name'] head = head + ',class' fout.write('%s\n' % head) features = desc['features'] cntFeatures = len(features) while True: l = fin.readline().strip() if len(l) == 0: break feature = [i.strip() for i in l.split(',')] l = '' tmp = None for i in range(cntFeatures): features[i]['isNormalize'] = tmp if features[i].get( 'isNormalize') is None else features[i]['isNormalize'] feature[i] = eval(feature[i]) if features[i]['isNormalize']: feature[i] = normalize(feature[i], maxs[i], mins[i]) l = repr(feature[i]) if i == 0 else l + ',%s' % repr(feature[i]) tmp = features[i]['isNormalize'] l = l + ',%s' % feature[-1] fout.write('%s\n' % l)
def __init__(self, prob): """load mnist dataset""" print("Loading MNIST dataset...") mndata = MNIST('./data/mnist/') mnist_train_images, mnist_train_labels = mndata.load_training() mnist_train_images = np.asarray(mnist_train_images) mnist_train_images = normalize(mnist_train_images) mnist_train_labels = np.asarray(mnist_train_labels) """divide dataset by label""" print("Dividing dataset...") sorted_train_images = [] sorted_train_labels = [] for label in range(0, 10): train_index = np.where(mnist_train_labels == label) sorted_train_images.append(mnist_train_images[train_index[0]]) sorted_train_labels.append( np.asarray([label] * len(train_index[0]))) """add salt_and_pepper noise""" print("Adding salt and pepper noise...") shape = 28 * 28 ##image shape of mnist self.train_images = [] self.train_labels = sorted_train_labels for images in sorted_train_images: noise_images = [] for image in images: noise_image = salt_and_pepper(image, prob, shape) noise_images.append(noise_image) self.train_images.append(noise_images)
def convert2tensor(self, dataset, batch_size, limit): b_data = dataset['X'] b_data = b_data[:limit] print("normalizing images...") b_data = common.normalize(b_data) print("done") target = dataset['y'] target = target.reshape((len(target))) target = target[:limit] """SVHN dataset is between 1 to 10: shift this to 0 to 9 to fit with neural network""" target = target - 1 data = [] for i in range(len(target)): data.append(b_data[:, :, :, i]) data = np.asarray(data) tensor_data = torch.from_numpy(data) tensor_data = tensor_data.float() tensor_target = torch.from_numpy(target) loader = data_utils.TensorDataset(tensor_data, tensor_target) loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle=True) return loader_dataset
def aggregate(): engine = get_db() table = engine.get_table('tokens') table.delete() bulk = ChunkedInsert(table, chunksize=10000) rex = re.compile(r'\w+') for names in iter_names(engine): parts = set() for name in names: for token in rex.findall(name): token = token.lower() if len(token) > 3: norm = normalize(token) if len(norm): parts.add((token, norm)) pairs = set() for pair in combinations(parts, 2): pairs.add(tuple(sorted(pair))) for ((a, an), (b, bn)) in pairs: if an == bn: continue max_dist = max(len(an), len(bn)) * 0.6 dist = distance(an, bn) if dist <= max_dist: # print(a, b, max_dist, dist, dist > max_dist) bulk.insert({ 'a': a, 'an': an, 'b': b, 'bn': bn, }) bulk.flush()
def genFeatureAfterNormalize(fin, fout, desc, mins, maxs): head = desc['features'][0]['name'] for f in desc['features'][1:]: head = head + ',%s' % f['name'] head = head + ',class' fout.write('%s\n' % head) features = desc['features'] cntFeatures = len(features) while True: l = fin.readline().strip() if len(l) == 0: break feature = [i.strip() for i in l.split(',')] l = '' tmp = None for i in range(cntFeatures): features[i]['isNormalize'] = tmp if features[i].get('isNormalize') is None else features[i]['isNormalize'] feature[i] = eval(feature[i]) if features[i]['isNormalize']: feature[i] = normalize(feature[i], maxs[i], mins[i]) l = repr(feature[i]) if i == 0 else l + ',%s' % repr(feature[i]) tmp = features[i]['isNormalize'] l = l + ',%s' % feature[-1] fout.write('%s\n' % l)
def heatMap(df, table, variable, unit): df.dropna(subset=[variable], inplace=True) df.reset_index(drop=True, inplace=True) normalized = com.normalize(df[variable]) data = list(zip(df.lat, df.lon, normalized)) m = folium.Map([df.lat.mean(), df.lon.mean()], tiles=None, zoom_start=3, control_scale=True, prefer_canvas=True) m.get_root().title = 'Map: ' + variable + unit m = addLayers(m) HeatMap(data, name='Data Density (%s)' % variable).add_to(m) m = addMarkers(m, df, variable, unit) m = addMousePosition(m) folium.LayerControl(collapsed=True).add_to(m) # m = addFullScreen(m) dirPath = 'embed/' if not os.path.exists(dirPath): os.makedirs(dirPath) fname = dirPath + 'heatMap.html' if os.path.exists(fname): os.remove(fname) m.save(fname) com.openHTML(fname) return
def setView(self, eye, target, up): zaxis = common.normalize(eye - target) xaxis = common.normalize(np.cross(up, zaxis)) yaxis = np.cross(zaxis, xaxis) xlist = xaxis.tolist() ylist = yaxis.tolist() zlist = zaxis.tolist() xlist.append(-np.dot(xaxis, eye)) ylist.append(-np.dot(yaxis, eye)) zlist.append(-np.dot(zaxis, eye)) orientation = np.array([xlist, ylist, zlist, [0, 0, 0, 1]]) return np.transpose(orientation)
def unregister_user(self,user): self.lock.acquire() try: nnick=normalize(user.nick) if self.users.get(nnick)==user: del self.users[nnick] finally: self.lock.release()
def forward(self, x, lengths): x = self.embedding(x) packed = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False) _, out = self.gru(packed) out = out.squeeze(0) out = normalize(out) return out
def handle_status(status, rules, offsets): for rule, data in rules.items(): for field in ['status_text', 'user_name', 'user_screen_name']: if offsets.get(data.get('regex')) > status.get('status_id'): continue m = rule.search(normalize(status.get(field))) #print [field,data.get('regex'), m] if m is not None: #print [field, data.get('regex'), m] data['status_id'] = status['status_id'] tag_table.insert(data)
def buildNormalsFromFaces(self): for vertex in self.vertexBuffer: npVertex = np.array(vertex, dtype = np.float32) n = np.zeros(3, dtype = np.float32) i = 0 for face in self.faces: if face.containsVertex(npVertex): n += face.normal i +=1 n = common.normalize(n / i) self.normalBuffer.append(n)
def get_channel(self,jid): channel_name=jid.node try: channel_name=node_to_channel(channel_name,self.default_encoding) except ValueError: self.__logger.debug("Bad channel name: %r" % (channel_name,)) return None if not channel_re.match(channel_name): self.__logger.debug("Bad channel name: %r" % (channel_name,)) return None return self.channels.get(normalize(channel_name))
def irc_cmd_JOIN(self,prefix,command,params): nprefix=normalize(prefix) nnick=normalize(self.session.nick) if nprefix==nnick or nprefix.startswith(nnick+"!"): if self.state=="join": self.__logger.debug("Channel %r joined!" % (self.name,)) self.session.user.sync_delay+=1 try: self.session.user.join_channel(self) finally: self.session.user.sync_delay-=1 self.state="joined" self.requests.get("JOIN") self.session.send("MODE %s" % (self.name,)) self.session.send("WHO %s" % (self.name,)) else: user=self.session.get_user(prefix) user.join_channel(self) self.send_notice_message(u"%s has joined" % (unicode(user.nick,self.encoding,"replace"),)) self.session.send("WHO %s" % (user.nick,))
def irc_cmd_JOIN(self, prefix, command, params): nprefix = normalize(prefix) nnick = normalize(self.session.nick) if nprefix == nnick or nprefix.startswith(nnick + "!"): if self.state == "join": self.__logger.debug("Channel %r joined!" % (self.name, )) self.session.user.sync_delay += 1 try: self.session.user.join_channel(self) finally: self.session.user.sync_delay -= 1 self.state = "joined" self.requests.get("JOIN") self.session.send("MODE %s" % (self.name, )) self.session.send("WHO %s" % (self.name, )) else: user = self.session.get_user(prefix) user.join_channel(self) self.send_notice_message( u"%s has joined" % (unicode(user.nick, self.encoding, "replace"), )) self.session.send("WHO %s" % (user.nick, ))
def channel_left(self,channel): try: del self.channels[normalize(channel.name)] except KeyError: pass if not channel.room_jid: return if channel.room_jid not in self.used_for: return try: self.used_for.remove(channel.room_jid) except: pass if not self.used_for: self.disconnect("Quit")
def join(self,stanza): to=stanza.get_to() if to.node=='#': return self.join_raw_channel(stanza) self.cond.acquire() try: if not self.ready: self.join_requests.append(stanza.copy()) return finally: self.cond.release() try: channel=node_to_channel(to.node,self.default_encoding) except ValueError: e=stanza.make_error_response("not-acceptable") self.component.send(e) return if self.channels.has_key(normalize(channel)): return if to not in self.used_for: self.used_for.append(to) channel=Channel(self,channel) channel.join(stanza) self.channels[normalize(channel.name)]=channel
def keras_load_data(trX, trYi, tsX, clean=True): # Image Cleaning via Erosion if clean: kernel = np.ones((2, 1)) trX = np.array([cv2.erode(img, kernel) for img in trX]).reshape(trX.shape) tsX = np.array([cv2.erode(img, kernel) for img in tsX]).reshape(tsX.shape) # zero mean, unit variance trX = normalize(trX) tsX = normalize(tsX) # Reshape input_shape = (28, 28, 1) trX = trX.reshape(trX.shape[0], *input_shape) tsX = tsX.reshape(tsX.shape[0], *input_shape) trYc = keras.utils.to_categorical(trYi, 20) print(trX.shape[0], 'training samples') print(tsX.shape[0], 'test samples') return trX, trYc, tsX
def get_user(self,prefix,create=1): if "!" in prefix: nick=prefix.split("!",1)[0] else: nick=prefix if not self.network.valid_nick(nick,0): return None nnick=normalize(nick) if self.users.has_key(nnick): return self.users[nnick] if not create: return None user=IRCUser(self,prefix) self.register_user(user) return user
def shading_func(u): pt = np.array(u) dists = [normalize(v[:2] - pt) for v in vertices] l_shading = 0. weights = get_barycentric_coords(pt, vertices) for weight, shade in zip(weights, shadings): l_shading += weight/sum(weights) * shade if normals: z = sum(w * v[2] for w, v in zip(weights, vertices)) v_n = np.zeros(3) for weight, normal in zip(weights, normals): v_n += weight * normal s_specular = specular(np.array(list(u) + [z]), v_n, kwargs['view_pt'], kwargs['light_pt']) l_shading += .5 * np.clip(s_specular, 0, 1) return np.clip(l_shading, 0, 1)
def main(): trainset = common.load_data(TRAINSET_PATH, sep=',') trainset = common.onehot_encode(trainset, 0) for i in range(N_MODEL): x_train, x_test, y_train, y_test = common.split(trainset, i) x_train, x_test = common.normalize(x_train, x_test) model, history = train(x_train, y_train, N_EPOCH) model.evaluate(x_test, y_test) model.save(common.numbering(MODEL_PATH, i)) save_history(history, common.numbering(HISTORY_PATH, i)) print(i, ' is done.')
def search(query): global filters query = common.normalize(query) filters.title = query # to do filtering by name query += ' ' + settings.extra if settings.time_noti > 0: provider.notify(message="Searching: " + query.title() + '...', header=None, time=settings.time_noti, image=settings.icon) query = provider.quote_plus(query) url_search = "%s/newtemp/include/ajax/ajax.search.php?search=%s" % (settings.url,query.replace(' ','%20')) # change in each provider provider.log.info(url_search) if browser.open(url_search): results = extract_torrents(browser.content) else: provider.log.error('>>>>>>>%s<<<<<<<' % browser.status) provider.notify(message=browser.status, header=None, time=5000, image=settings.icon) results = [] return results
def addMarkers(m, df, variable, unit): normalized = com.normalize(df[variable]) mc = MarkerCluster(name=variable + unit, options={ 'spiderfyOnMaxZoom': 'False', 'disableClusteringAtZoom': '4' }) for i in range(len(df)): folium.CircleMarker(location=[df.lat[i], df.lon[i]], radius=(normalized[i] * 10), tooltip='%s: %f%s <br> date: %s' % (variable, df[variable][i], unit, df['time'][i]), color=colors['darkOrange'], fill=True).add_to(mc) mc.add_to(m) return m
def sync_user(self,user,status=None): if user.channels.has_key(normalize(self.name)): if user not in self.users: self.users.append(user) else: for m in self.multiarg_modes: ul=self.modes.get(m,[]) if user in ul: ul.remove(user) if user in self.users: self.users.remove(user) self.send_notice_message(u"%s has quit" % (unicode(user.nick,self.encoding,"replace"),)) if self.state: p=self.get_user_presence(user,status=status) self.session.component.send(p)
def __init__( self, # maximum fundamental and harmonic frequency, normalized max_f=0.5, # number of table points, 4096 is a good tradeoff between accuracy and # space when using linear interpolation to look up N=4096, # The duty cycle D=0.5, # if true, scale output to lie between [-1 and 1] normalize=False, dtype='float32'): # tabs_per_oct has to be 1, or we can figure out a different way to do # it where the number of periods per table is integral valued tabs_per_oct = 1 min_f = 1 / N n_tabs = int(np.floor(np.log2(max_f / min_f) / tabs_per_oct)) self.tabs = np.zeros((n_tabs, N), dtype=dtype) self.tabs_fs = np.zeros(n_tabs) w = 2 * np.pi * np.arange(N) / N for n_t in range(n_tabs): # centre around 0 self.tabs[n_t, :] += D - 0.5 k = 1 print(n_t) while k * 2**((n_t + 1) / tabs_per_oct) * min_f < max_f: a_k = np.sin(2 * np.pi * k * D) / (k * np.pi) b_k = 2 * np.sin(np.pi * k * D)**2 / (k * np.pi) if a_k < 1e-8: a_k = 0 if b_k < 1e-8: b_k = 0 print(a_k, b_k) self.tabs[n_t, :] += a_k * np.cos(k * w) + b_k * np.sin(k * w) k += 1 print() if normalize: self.tabs[n_t, :] = common.normalize(self.tabs[n_t, :], subtract_mean=False) self.tabs_fs[n_t] = 2**(n_t / tabs_per_oct) * min_f self.min_f = min_f self.max_f = min_f * 2**((n_tabs - 1) / tabs_per_oct) self.N = N self.tabs_per_oct = tabs_per_oct self.n_tabs = n_tabs self.D = D self.last_pos = 0
def voice_user(self,nick,stanza): nick=nick.encode(self.encoding,"strict") user=self.session.users.get(normalize(nick)) if not user in self.users: r=stanza.make_error_response("item-not-found") self.session.component.send(r) return if user in self.modes.get("v",[]): r=stanza.make_result_response() self.session.component.send(r) return if user in self.modes.get("o",[]): change="-o+v %s %s" % (nick,nick) else: change="+v "+nick self.session.send("MODE %s %s" % (self.name,change)) self.requests.add("MODE",stanza,change)
def validate_and_get_entry(self, entry): try: # Remove all non-numbers from entry_val, and replace comma with period entry_val = re.sub("[^0-9,\.]", "", entry.get()).replace(",", ".") if entry_val == "": return False # Normalize value to remove all trailing 0s value = normalize(entry_val) entry.delete(0, "end") entry.insert(0, value) if value < 0: return False return value except: return False
def get_probs_for_uncertain(uncertainset): trainset = common.load_data(TRAINSET_PATH, sep=',') encoded_uncertainset = common.onehot_encode( uncertainset[:, common.N_DISASTER:], 0) encoded_trainset = common.onehot_encode(trainset, 0) prob_sums = np.zeros((len(uncertainset), common.N_CLASS)) for i in range(N_MODEL): x_train, _, _, _ = common.split(encoded_trainset, i) _, normalized_uncertainset = common.normalize(x_train, encoded_uncertainset) prob_sums += tf.keras.models.load_model(common.numbering( MODEL_PATH, i)).predict(normalized_uncertainset) print(f'{i} is done.') return prob_sums / N_MODEL
def __init__(self,node): self.node=node self.jid=JID(node.prop("jid")) servers=node.xpathEval("server") self.servers=[] for s in servers: self.servers.append(ServerConfig(s)) channels=node.xpathEval("channel") self.channels={} if channels: for c in channels: ch=ChannelConfig(c) self.channels[normalize(ch.name)]=ch self.default_encoding=node.prop("encoding") self.nicks_8bit=node.prop("nicks_8bit") self.name=node.prop("name") self.max_nick_length=int(node.prop("max_nick_length")) self.max_channel_length=int(node.prop("max_nick_length")) self.password=node.prop("password")
def _process_input(self,input): self.__logger.debug("Server message: %r" % (input,)) split=input.split(" ") if split[0].startswith(":"): prefix=split[0][1:] split=split[1:] else: prefix=None if split: command=split[0] split=split[1:] else: command=None params=[] while split: if split[0].startswith(":"): params.append(string.join(split," ")[1:]) break params.append(split[0]) split=split[1:] if self.raw_channel: self.pass_input_to_raw_channel(prefix,command,params) if command and numeric_re.match(command): params=params[1:] self.lock.release() try: f=None for c in self.channels.keys(): if params and normalize(params[0])==c: f=getattr(self.channels[c],"irc_cmd_"+command,None) if f: break if not f: f=getattr(self,"irc_cmd_"+command,None) if f: f(prefix,command,params) finally: self.lock.acquire()
def whoreply(self,params): if params[4]!=self.nick: return if len(params)!=7: return channel,user,host,server,nick,flags,rest=params fullname=rest.split(None,1)[1] if channel and channel!="*": channel=self.session.channels.get(normalize(channel)) if not channel: self.__logger.debug("Ignoring WHO reply: %r - unknown channel" % (params,)) return else: channel=None self.sync_delay+=1 try: self.nick=nick self.host=host self.user=user if channel: self.join_channel(channel) if "@" in flags: channel.set_mode("o",self) elif "+" in flags: channel.set_mode("v",self) else: channel.reset_mode("o",self) channel.reset_mode("v",self) if "G" in flags: self.mode["a"]=1 else: self.mode["a"]=0 finally: self.sync_delay-=1 if channel: channel.sync_user(self)
def join_channel(self,channel): self.channels[normalize(channel.name)]=channel self.sync_in_channel(channel)
def get_channel_config(self,channel): return self.channels.get(normalize(channel))
def _read_files(a_crp_files, a_pos, a_neg, a_neut, a_pos_re=NONMATCH_RE, a_neg_re=NONMATCH_RE): """Read corpus files and populate one-directional co-occurrences. @param a_crp_files - files of the original corpus @param a_pos - initial set of positive terms to be expanded @param a_neg - initial set of negative terms to be expanded @param a_neut - initial set of neutral terms to be expanded @param a_pos_re - regular expression for matching positive terms @param a_neg_re - regular expression for matching negative terms @return 2-tuple - training sets of features and their gold classes """ print("Reading corpus...", end="", file=sys.stderr) i = 0 ts_x = [] ts_y = [] tweet_toks = set() iform = itag = ilemma = prev_lemma = "" for ifname in a_crp_files: with codecs.open(ifname, 'r', ENCODING) as ifile: prev_lemma = "" for iline in ifile: iline = iline.strip().lower() if iline and iline[0] == ESC_CHAR: if FASTMODE: i += 1 if i > 300: break _update_ts(ts_x, ts_y, tweet_toks, a_pos, a_neg, a_neut, a_pos_re, a_neg_re) prev_lemma = "" continue elif not iline or SENT_END_RE.match(iline): prev_lemma = "" continue try: iform, itag, ilemma = TAB_RE.split(iline) except: print("Invalid line format at line: {:s}".format( repr(iline)), file=sys.stderr ) continue ilemma = normalize(ilemma) if a_pos_re.search(iform) or a_neg_re.search(iform): tweet_toks.add(iform) elif a_pos_re.search(ilemma) or a_neg_re.search(ilemma): tweet_toks.add(ilemma) elif itag[:2] not in INFORMATIVE_TAGS \ or not check_word(ilemma): continue else: tweet_toks.add(ilemma) if prev_lemma: tweet_toks.add((prev_lemma, ilemma)) prev_lemma = ilemma _update_ts(ts_x, ts_y, tweet_toks, a_pos, a_neg, a_neut, a_pos_re, a_neg_re) print(" done", file=sys.stderr) return _prune_ts(ts_x, ts_y)
def severyn(a_N, a_crp_files, a_pos, a_neg, a_neut, a_pos_re=NONMATCH_RE, a_neg_re=NONMATCH_RE): """Method for generating sentiment lexicons using Severyn's approach. @param a_N - number of terms to extract @param a_crp_files - files of the original corpus @param a_pos - initial set of positive terms to be expanded @param a_neg - initial set of negative terms to be expanded @param a_neut - initial set of neutral terms to be expanded @param a_pos_re - regular expression for matching positive terms @param a_neg_re - regular expression for matching negative terms @return list of terms sorted according to their polarity scores """ a_pos = set(normalize(w) for w in a_pos) a_neg = set(normalize(w) for w in a_neg) vectorizer = DictVectorizer() # model for distinguishing between the subjective and objective classes so_clf = LinearSVC(C=0.3) so_model = Pipeline([("vectorizer", vectorizer), ("LinearSVC", so_clf)]) # model for distinguishing between the positive and negative classes pn_clf = LinearSVC(C=0.3) pn_model = Pipeline([("vectorizer", vectorizer), ("LinearSVC", pn_clf)]) # generate general training sets x_so = [] y_so = [] x_pn = [] y_pn = [] for x, y in zip(*_read_files(a_crp_files, a_pos, a_neg, a_neut, a_pos_re, a_neg_re)): if y != NEUTRAL: y_so.append(SUBJECTIVE) x_pn.append(x) y_pn.append(y) else: y_so.append(y) x_so.append(x) # generate lists of gold labels spefici for each task so_model.fit(x_so, y_so) # check whether the sign of core features is being determined correctly so_feat2coef = {} coefs = so_clf.coef_[0] for f_name, f_score in zip(vectorizer.get_feature_names(), coefs): assert f_name not in a_neut or f_score < 0, \ "Invalid coefficient sign expected for objective features." so_feat2coef[f_name] = f_score # train positive/negative classifier pn_model.fit(x_pn, y_pn) # generate actual output ret = [(w, POSITIVE, FMAX) for w in a_pos] \ + [(w, NEGATIVE, FMIN) for w in a_neg] coefs = pn_clf.coef_[0] for f_name, f_score in zip(vectorizer.get_feature_names(), coefs): assert f_name not in a_neg or f_score < 0 # skip seed terms and terms deemed as objective if f_name in a_pos or f_name in a_neg \ or so_feat2coef.get(f_name, 1) < 0: continue ret.append((f_name, POSITIVE if f_score > 0. else NEGATIVE, f_score * so_feat2coef.get(f_name, 1.))) ret.sort(key=lambda el: abs(el[-1]), reverse=True) if a_N >= 0: del ret[a_N:] return ret
def leave_channel(self,channel,status=None): try: del self.channels[normalize(channel.name)] self.sync_in_channel(channel,status=status) except KeyError: pass
def check_nick(self,nick): nick=nick.encode(self.default_encoding) if normalize(nick)==normalize(self.nick): return 1 else: return 0
def check_prefix(self,prefix): if "!" in prefix: nick=prefix.split("!",1)[0] else: nick=prefix return normalize(nick)==normalize(self.nick)
def __init__(self, environment): self.env = environment self.do_keep_prob = tf.placeholder("float", shape=(), name='do_keep_prob') self.forward_model = __import__('forward_model').ForwardModel(state_size=self.env.state_size, action_size=self.env.action_size, rho=self.env.fm_rho, beta=self.env.fm_beta, encoding_size=self.env.fm_encoding_size, batch_size=self.env.fm_batch_size, multi_layered_encoder=self.env.fm_multi_layered_encoder, num_steps=self.env.fm_num_steps, separate_encoders=self.env.fm_separate_encoders, merger=self.env.fm_merger, activation=self.env.fm_activation, lstm=self.env.fm_lstm, dropout_keep=self.env.do_keep_prob) autoencoder = None transformed_state_size = self.env.state_size self.discriminator = __import__('discriminator').DISCRIMINATOR(in_dim=transformed_state_size + self.env.action_size, out_dim=2, size=self.env.d_size, lr=self.env.d_lr, do_keep_prob=self.do_keep_prob, weight_decay=self.env.weight_decay) self.policy = __import__('policy').POLICY(in_dim=transformed_state_size, out_dim=self.env.action_size, size=self.env.p_size, lr=self.env.p_lr, w_std=self.env.w_std, do_keep_prob=self.do_keep_prob, n_accum_steps=self.env.policy_accum_steps, weight_decay=self.env.weight_decay) # self.policy_ = __import__('policy').POLICY(in_dim=transformed_state_size, # out_dim=self.env.action_size, # size=self.env.p_size, # lr=self.env.p_lr, # w_std=self.env.w_std, # do_keep_prob=self.do_keep_prob, # n_accum_steps=self.env.policy_accum_steps, # weight_decay=self.env.weight_decay) self.er_agent = ER(memory_size=self.env.er_agent_size, state_dim=self.env.state_size, action_dim=self.env.action_size, reward_dim=1, # stub connection qpos_dim=self.env.qpos_size, qvel_dim=self.env.qvel_size, batch_size=self.env.batch_size, history_length=1) self.er_expert = common.load_er(fname=self.env.run_dir + self.env.expert_data, batch_size=self.env.batch_size, history_length=1, traj_length=2) self.env.sigma = self.er_expert.actions_std/self.env.noise_intensity self.states_ = tf.placeholder("float", shape=(None, self.env.state_size), name='states_') # Batch x State self.states = tf.placeholder("float", shape=(None, self.env.state_size), name='states') # Batch x State self.actions = tf.placeholder("float", shape=(None, self.env.action_size), name='action') # Batch x Action self.label = tf.placeholder("float", shape=(None, 1), name='label') self.gamma = tf.placeholder("float", shape=(), name='gamma') self.temp = tf.placeholder("float", shape=(), name='temperature') self.noise = tf.placeholder("float", shape=(), name='noise_flag') self.noise_mean = tf.placeholder("float", shape=(self.env.action_size)) states_ = common.normalize(self.states_, self.er_expert.states_mean, self.er_expert.states_std) states = common.normalize(self.states, self.er_expert.states_mean, self.er_expert.states_std) if self.env.continuous_actions: actions = common.normalize(self.actions, self.er_expert.actions_mean, self.er_expert.actions_std) else: actions = self.actions self.forward_model.states_normalizer = self.er_expert.states_max - self.er_expert.states_min self.forward_model.actions_normalizer = self.er_expert.actions_max - self.er_expert.actions_min self.forward_model.states_normalizer = self.er_expert.states_std self.forward_model.actions_normalizer = self.er_expert.actions_std s = np.ones((1, self.forward_model.arch_params['encoding_dim'])) # 1. Forward Model fm_output, _, gru_state = self.forward_model.forward([states_, actions, s]) l2_loss = tf.reduce_mean(tf.square(states-fm_output)) self.forward_model.train(objective=l2_loss) # 2. Discriminator labels = tf.concat(1, [1 - self.label, self.label]) d = self.discriminator.forward(states, actions, autoencoder) # 2.1 0-1 accuracy correct_predictions = tf.equal(tf.argmax(d, 1), tf.argmax(labels, 1)) self.discriminator.acc = tf.reduce_mean(tf.cast(correct_predictions, "float")) # 2.2 prediction d_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=d, labels=labels) # cost sensitive weighting (weigh true=exprt, predict=agent mistakes) d_loss_weighted = self.env.cost_sensitive_weight * tf.mul(tf.to_float(tf.equal(tf.squeeze(self.label), 1.)), d_cross_entropy) +\ tf.mul(tf.to_float(tf.equal(tf.squeeze(self.label), 0.)), d_cross_entropy) discriminator_loss = tf.reduce_mean(d_loss_weighted) self.discriminator.train(objective=discriminator_loss) self.discriminator.acc_summary = tf.scalar_summary('acc_d', self.discriminator.acc) # 3. Collect experience mu = self.policy.forward(states, autoencoder) if self.env.continuous_actions: a = common.denormalize(mu, self.er_expert.actions_mean, self.er_expert.actions_std) eta = tf.random_normal(shape=tf.shape(a), stddev=self.env.sigma, mean=self.noise_mean) self.action_test = tf.squeeze(a + self.noise * eta) else: a = common.gumbel_softmax(logits=mu, temperature=self.temp) self.action_test = tf.argmax(a, dimension=1) # 4. Policy # 4.1 SL actions_a = self.policy.forward(states, autoencoder) policy_sl_loss = tf.nn.l2_loss(actions_a - actions) # action == expert action self.policy.train(objective=policy_sl_loss, mode='sl') # 4.2 Temporal Regularization actions_a_ = self.policy_.forward(states, autoencoder) policy_tr_loss = self.env.policy_tr_w * self.env.policy_accum_steps * tf.nn.l2_loss(actions_a - actions_a_) self.policy.train(objective=policy_tr_loss, mode='tr') # op for copying weights from policy to policy_ self.policy_.copy_weights(self.policy.weights, self.policy.biases) # Plain adversarial learning d = self.discriminator.forward(states, actions_a, autoencoder) policy_alr_loss = self.al_loss(d) self.policy.train(objective=policy_alr_loss, mode='alr') # 4.3 AL def policy_loop(state_, t, total_cost, total_trans_err, _): mu = self.policy.forward(state_, autoencoder) if self.env.continuous_actions: eta = self.env.sigma * tf.random_normal(shape=tf.shape(mu), mean=self.noise_mean) a = mu + eta else: a = common.gumbel_softmax_sample(logits=mu, temperature=self.temp) # minimize the gap between agent logit (d[:,0]) and expert logit (d[:,1]) d = self.discriminator.forward(state_, a, autoencoder) cost = self.al_loss(d) # add step cost total_cost += tf.mul(tf.pow(self.gamma, t), cost) # get next state if self.env.continuous_actions: a_sim = common.denormalize(a, self.er_expert.actions_mean, self.er_expert.actions_std) else: a_sim = tf.argmax(a, dimension=1) state_env, _, env_term_sig, = self.env.step(a_sim, mode='tensorflow')[:3] state_e = common.normalize(state_env, self.er_expert.states_mean, self.er_expert.states_std) state_e = tf.stop_gradient(state_e) state_a, _, _ = self.forward_model.forward([state_, a, s]) state, nu = common.re_parametrization(state_e=state_e, state_a=state_a) total_trans_err += tf.reduce_mean(abs(nu)) t += 1 return state, t, total_cost, total_trans_err, env_term_sig def policy_stop_condition(state_, t, cost, trans_err, env_term_sig): cond = tf.logical_not(env_term_sig) cond = tf.logical_and(cond, t < self.env.n_steps_train) cond = tf.logical_and(cond, trans_err < self.env.total_trans_err_allowed) return cond state_0 = tf.slice(states, [0, 0], [1, -1]) loop_outputs = tf.while_loop(policy_stop_condition, policy_loop, [state_0, 0., 0., 0., False]) self.policy.train(objective=loop_outputs[2], mode='al')
def _read_files(a_stat, a_crp_files, a_pos, a_neg, a_neut, a_pos_re=NONMATCH_RE, a_neg_re=NONMATCH_RE): """Read corpus files and populate one-directional co-occurrences. @param a_stat - statistics on term occurrences @param a_crp_files - files of the original corpus @param a_pos - initial set of positive terms to be expanded @param a_neg - initial set of negative terms to be expanded @param a_neut - initial set of neutral terms to be expanded @param a_pos_re - regular expression for matching positive terms @param a_neg_re - regular expression for matching negative terms @return 2-tuple - number of positive and number of negative tweets @note modifies `a_stat' in place """ print("Reading corpus...", end="", file=sys.stderr) i = 0 iform = itag = ilemma = "" tlemmas = set() tweet_stat = [0, 0, 0] seeds = a_pos | a_neg | a_neut for ifname in a_crp_files: with codecs.open(ifname, 'r', ENCODING) as ifile: for iline in ifile: iline = iline.strip().lower() if iline and iline[0] == ESC_CHAR: if FASTMODE: i += 1 if i > 300: break _update_stat(a_stat, tweet_stat, tlemmas, a_pos, a_neg, a_neut, a_pos_re, a_neg_re) continue elif not iline or SENT_END_RE.match(iline): continue try: iform, itag, ilemma = TAB_RE.split(iline) except: print("Invalid line format at line: {:s}".format( repr(iline)), file=sys.stderr ) continue ilemma = normalize(ilemma) if a_pos_re.search(iform) or a_neg_re.search(iform) \ or iform in seeds: tlemmas.add(iform) elif a_pos_re.search(ilemma) or a_neg_re.search(ilemma) \ or ilemma in seeds: tlemmas.add(ilemma) elif itag[:2] not in INFORMATIVE_TAGS \ or not check_word(ilemma): continue else: tlemmas.add(ilemma) _update_stat(a_stat, tweet_stat, tlemmas, a_pos, a_neg, a_neut, a_pos_re, a_neg_re) print(" done", file=sys.stderr) # remove words with fewer occurrences than the minimum threshold _prune_stat(a_stat) return tweet_stat
def register_user(self,user): self.lock.acquire() try: self.users[normalize(user.nick)]=user finally: self.lock.release()