def seg(file): img = cv2.imread(file + ".jpg") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) border, thresholdedValue = cv2.threshold( gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # noise removal oneArray = np.ones((3, 3), np.uint8) opening = cv2.morphologyEx(thresholdedValue, cv2.MORPH_OPEN, oneArray, iterations=2) # sure background area bg = cv2.dilate(opening, oneArray, iterations=3) # Finding sure foreground area distance = cv2.distanceTransform(opening, cv2.DIST_L2, 5) border, fg = cv2.threshold(distance, 0.7 * distance.max(), 255, 0) # Finding bgFg region fg = np.uint8(fg) bgFg = cv2.subtract(bg, fg) # Marker labelling border, markers = cv2.connectedComponents(fg) # Add one to all labels so that sure background is not 0, but 1 markers = markers + 1 # Now, mark the region of bgFg with zero markers[bgFg == 255] = 0 markers = cv2.watershed(img, markers) img1 = img.copy() img1[markers == -1] = [255, 255, 255] img1[markers != -1] = [0, 0, 0] img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) edged = cv2.Canny(img1, 10, 250) _, contours, _ = cv2.findContours(thresholdedValue, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # _, contours, _= cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) idx = 0 #print("deep") for c in contours: x, y, w, h = cv2.boundingRect(c) #print(str(w) + " " + str(h)) if w > 25 and h > 25: idx += 1 new_img = img[y:y + h, x:x + w] #print("deep") cv2.imwrite("./str/" + file + str(idx) + '.jpg', new_img) feature.feature("./str/" + file + str(idx), 0) #seg("fruit")
def count_occurances(data, feature_index, feature_vec): """ Main method to count the instances of occurance at each feature. Builds a feature object. data Args: data (list): list of bases (column vector) feature_index: annoying index for getting the right vector feature_vec: list that holds all feature objects Returns: none """ # build pos and neg lists with just the individual base pos = [pro.features[feature_index] for pro in data if pro.promoter is True] neg = [ pro.features[feature_index] for pro in data if pro.promoter is False ] # get base type totals and make a feature object base_a = (pos.count('a'), neg.count('a'), pos.count('a') + neg.count('a')) base_c = (pos.count('c'), neg.count('c'), pos.count('c') + neg.count('c')) base_g = (pos.count('g'), neg.count('g'), pos.count('g') + neg.count('g')) base_t = (pos.count('t'), neg.count('t'), pos.count('t') + neg.count('t')) f = feature( len(pos) + len(neg), base_a, base_c, base_g, base_t, len(pos), len(neg), feature_index) feature_vec.append(f)
def count_occurances(data, feature_index, feature_vec): """ Main method to count the instances of occurance at each feature. Builds a feature object. data Args: data (list): list of bases (column vector) feature_index: annoying index for getting the right vector feature_vec: list that holds all feature objects Returns: none """ # build pos and neg lists with just the individual base pos = [pro.features[feature_index] for pro in data if pro.promoter is True] neg = [pro.features[feature_index] for pro in data if pro.promoter is False] # get base type totals and make a feature object base_a = (pos.count('a'), neg.count('a'), pos.count('a') + neg.count('a')) base_c = (pos.count('c'), neg.count('c'), pos.count('c') + neg.count('c')) base_g = (pos.count('g'), neg.count('g'), pos.count('g') + neg.count('g')) base_t = (pos.count('t'), neg.count('t'), pos.count('t') + neg.count('t')) f = feature(len(pos) + len(neg), base_a, base_c, base_g, base_t, len(pos), len(neg), feature_index) feature_vec.append(f)
def register(self, name, version, desc, feature_class): if (name.lower(),version) in self._features: print('Feature already present!') return False self._features[(name.lower(), version)] = feature(name, version, desc, feature_class) return True
def get_state_reward(self, fw): """Compute the state reward.""" state_r = feature.feature(lambda t, x, u: 0.0) for lane, w_lane in zip(self.world.lanes, self.w_lanes): if self.is_human: lane_gaussian_std = constants.LANE_REWARD_STDEV_h else: lane_gaussian_std = constants.LANE_REWARD_STDEV_r state_r += w_lane * lane.gaussian(fw=fw, stdev=lane_gaussian_std) for fence, w_fence in zip(self.world.fences, self.w_fences): if self.fence_sigmoid: # sigmoid fence reward state_r += w_fence * fence.sigmoid(fw=fw) else: # gaussian-shaped fence reward state_r += w_fence * fence.gaussian(fw=fw) if self.speed is not None: state_r += self.w_speed * feature.speed(self.speed) for other_traj, w_other_traj in zip(self.other_car_trajs, self.w_other_car_trajs): if self.fine_behind: state_r += (w_other_traj * other_traj.gaussian(fw, length=.14, width=.03)) else: state_r += (w_other_traj * other_traj.gaussian(fw, length=.14, width=.03) + other_traj.not_behind(fw, self.w_behind)) for other_truck_traj, w_other_truck_traj in zip( self.other_truck_trajs, self.w_other_truck_trajs): state_r += (w_other_truck_traj * other_truck_traj.sigmoid(fw)) return state_r
def data_preprocess(file_path, voc): """ file_path:the file that store the img info """ with open(file_path, "rb") as file: reader = csv.reader(file) res_feature = [] res_label = [] for x in reader: print(x[0]) res = feature.feature(x[0], voc) if (type(res) != type(np.zeros(0))): continue else: res_feature.append(feature.feature(x[0], voc)) print(len(res_feature)) res_label.append(int(x[1])) return res_feature, res_label
def Handle_cur(self, N=50,size=51): K = np.array( np.linspace(1, 100, N), dtype=np.int) res = np.linspace(0, 0, N*size*size) res.shape = N, size, size count = 0 for k in K: #RGB temp = np.linspace(0, 0, size*size*3) temp.shape = size, size, 3 #print range(self.cur, self.cur+self.length-k, 1), range(self.cur, self.cur+self.length, 1), k for i in range(3): ave = sum(self.frames[:,:,:,i])/float(self.length) #return ave up = np.linspace(0, 0, size*size) down = np.linspace(0, 0, size*size) up.shape = size, size down.shape = size, size for j in range(self.cur, self.cur+self.length-k, 1): up += ( self.frames[(j+k)%self.length,:,:,i] - ave ) * ( self.frames[j%self.length,:,:,i] - ave ) for j in range(self.cur, self.cur+self.length, 1): down += ( self.frames[j%self.length,:,:,i] - ave ) ** 2 #print np.sum(down)/2601 temp[:,:,i] = up/down res[count] = np.amax(temp, axis=2) count += 1 r = [ 0 for i in range(size**2)] count = 0 for i in range(size): for j in range(size): a, b, r[count] = np.polyfit(K, res[:,i,j], 2) if np.isnan(r[count]) or r[count] == np.inf: count -= 1 r[count] == 0 count += 1 print sum(r)/float(count), self.res[self.cur] if sum(r)/float(count) > 0.98: res = self.res[self.cur][0] else: res = self.res[self.cur][1] #读入下一帧 ret, frame = self.cap.read( ) if ret == True: x,y,z = frame.shape self.frames[self.cur] = frame[x/2:x/2+size, y/2:y/2+size] im = feature.feature(frame) self.res[self.cur] = [ im.svmclassify(),im.get_patch() ] else: self.length -= 1 self.cur += 1 self.cur %= 100 return res
def hsv(file): img = cv2.imread(file + ".jpg") hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) a = (1, 190, 200) sum = (18, 255, 255) mask = cv2.inRange(hsv, a, sum) edged = cv2.Canny(mask, 10, 250) im2, contours, hierarchy = cv2.findContours(edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # _, contours, _= cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) idx = 0 for c in contours: x, y, w, h = cv2.boundingRect(c) #print(str(w) + " " + str(h)) if w > 50 and h > 50: idx += 1 new_img = img[y:y + h, x:x + w] cv2.imwrite("./hsv/" + file + str(idx) + '.jpg', new_img) #print("./hsv/" + file + str(idx)) feature.feature("./hsv/" + file + str(idx), 0) #hsv("fruit")
def load_data(self): features = [] targets = [] f_train = open(raw_trainingset, 'r') for line in f_train: corpus = gen_training_corpus(line) feat = feature(corpus.title, corpus.person1, corpus.person2) features.append(np.array(feat)) targets.append(corpus.label) self.features = np.array(features) self.targets = np.array(targets) f_train.close()
def train(self,train_target,train_samples): self._prepared = False; self.tfidf = TfidfVectorizer(stop_words='english') self.tfidf.fit_transform(train_samples[1]+train_samples[2]); #title and description #Classifier Model self.classifyers={}; classes=[]; if not self.keyword_detection_list : for each in train_target: classes.extend(x for x in each); classes = set(classes); else: classes = self.keyword_detection_list; print 'Total number of classes for this model ', len(classes) class_example_count = [] for each in classes: Y =[1 if each in x else 0 for x in train_target ]; class_example_count.append(sum(Y)); print 'examples seen for each class during training ' ,class_example_count self.bow = feature.feature(self.featurename,train_samples,keywords=self.keyword_detection_list); metric = []; #Classifier Model : Train for each in classes: #Balancing dataset target_y = [1 if each in x else 0 for x in train_target ]; [target_y_balanced, train_balanced]=load.split_equally(target_y,train_samples) #[target_y_balanced, train_balanced] = [target_y,train_samples] #print 'Not balancing test/train' print 'Training to tag %s from %d samples' %(each ,len(target_y_balanced)) Y =np.array(target_y_balanced); X = self.bow.get_incremental_features(train_balanced); assert(X.shape[0] == len(train_balanced)) assert(Y.shape[0] == len(train_balanced)) #if not LOGISTIC_REGRESSION: # clf = MultinomialNB(fit_prior=False);# onlu MultinomialNB takes sparse matrix , to offset hughe neg samples #else: clf = LogisticRegression(); clf.fit(X,Y); #pred = cross_validation.cross_val_predict(clf, X , Y, cv=3); self.classifyers[each] = clf; #eval.confused_examples(each,train_target,train_balanced,Y.tolist(),pred,3) #metric.append((each,prec,rec,acc,tp,tn,fp,fn)) self.train_target = train_target; x = [eachtraindoc[1] for eachtraindoc in train_samples] print 'tfidf ..' self.tfidfVec = self.tfidf.fit_transform(x); self.tfidfVec = self.tfidfVec.transpose(); print self.tfidfVec.shape self._prepared = True;
def generateTrainingData(self,err,localCursor,query,sorted_resource_jobs,sorted_tool_jobs): print query % err localCursor.execute(query,(err)) X,Y,D=[],[],[] count = 0 for (jid,res, tool, date_submitted, name) in localCursor: if not date_submitted: continue count += 1 #diff_date=date_entered - self.base #e=diff_date.days*24*3600+diff_date.seconds res_feat=[0]*len(self.resources) res_feat[self.resource_no[res]]=1 test_feat = self._testFeatures(date_submitted) tool_feat = [0]*(len(self.tools.keys())) tool_feat[self.tools[tool]] = 1 #q_feat=[int(queue=='shared')] #nc=[nodecount] #prcs=[processors] lastJobs, lastToolJob, jobs_since_feat = self._lastJobResult(sorted_resource_jobs,res, sorted_tool_jobs, self.tools[tool],date_submitted) #sjf = self._simultaneousJobFeature(res, date_submitted, sorted_resource_jobs) feat = feature(jid,res_feat, tool_feat, test_feat,[], lastJobs) X.append(feat.toList()) Y.append(int(name=='FINISHED')) D.append(date_submitted) if(count%1000==0): sys.stdout.write("Created %d feature vectors.\n" % count) feat_names =(['resource']*len(self.resources))+self.tools.keys()+self.tests.keys()+(['last_job']*self.limit) sys.stdout.write("Created %d feature vectors.\n" % count) XPath=self.datadir+err+'X' YPath=self.datadir+err+'Y' DPath=self.datadir+err+'D' if os.path.isdir(XPath): shutil.rmtree(XPath) if os.path.isdir(YPath): shutil.rmtree(YPath) if os.path.isdir(DPath): shutil.rmtree(DPath) if len(Y)>0: print "Dumping features of type %s"%err #create a new folder #define a beginning point and put overflow in files os.mkdir(self.datadir+err+'X') os.mkdir(self.datadir+err+'Y') os.mkdir(self.datadir+err+'D') beg=0 count=1 while len(Y)>beg: end = min(50000*count, len(Y)) pickle.dump(X[beg:end],open(self.datadir+err+'X'+'/'+str(count)+'.pkl','wb')) pickle.dump(Y[beg:end],open(self.datadir+err+'Y'+'/'+str(count)+'.pkl','wb')) pickle.dump(D[beg:end],open(self.datadir+err+'D'+'/'+str(count)+'.pkl','wb')) beg=end count+=1 pickle.dump(feat_names,open(self.datadir+'feat_names.pkl','wb'))
def generate_data(self): # meta-data meta_df = pd.read_csv(self.metadata_path) # Generate training data or validation data? if self.is_training: meta_df = meta_df[meta_df['fold'] != self.fold] else: meta_df = meta_df[meta_df['fold'] == self.fold] # Collect category names whose "set_split" == 'training' (or "set_split" == 'validation' ) label_dict = { k: v for v, k in enumerate( sorted(set(meta_df[self.label_column_name].values))) } #print(label_dict) # Append tid_append = [] # Audio track ID class_append = [] # Class patch_append = [] # Patch # Loop for i, row in meta_df.iterrows(): tid = row['track_id'] label = row[self.label_column_name] event_start = row['event_start'] # Extract patch result, patch = feature.feature(tid, event_start) # Append if result: tid_append.append(tid) class_append.append(label_dict.get(label)) patch_append.append(patch) print('successfully extracted patch : {}'.format(tid)) # Write appended array into data frame df = pd.DataFrame() df['track_id'] = tid_append df['category'] = class_append df['patch'] = patch_append # Shuffle rows (for better training) df = df.iloc[np.random.permutation(len(df))] self.data_frame = df self.num_class = len(label_dict) #print(self.num_class) # Save "data_frame" as .p (pickle) pickle.dump(self.data_frame, open(self.data_path, "wb"))
def tran(root_path, image, image_name): files = os.listdir(root_path) for file in files: path = os.path.join(root_path, file) if os.path.isdir(path): tran(path, image, image_name) else: if path[-4:] == '.jpg': res = feature.feature(path) if isinstance(res, type(np.zeros(0))): image.append(res) image_name.append(path) return image, image_name
def __init__(self, name, size=51): self.cap = cv2.VideoCapture(name) self.capbak = cv2.VideoCapture(name) #雨雪取第一个结果,否则取第二个结果 self.res = [ [0,0] for i in range(100)] #取前100帧,算出可能的结果再结合上下文挑选 ret, frame = self.cap.read( ) x, y, z = frame.shape self.x = y self.y = x self.frames = np.linspace(0, 0, 100*size*size*3) self.frames.shape = 100, size, size, 3 self.frames = np.array(self.frames, dtype=frame.dtype) #从中间位置取一个size*size的小块 self.frames[0] = frame[x/2:x/2+size, y/2:y/2+size] im = feature.feature(frame) self.res[0] = [ im.svmclassify(),im.get_patch() ] self.length = 1 while(self.length < 100 and self.cap.isOpened()): ret, frame = self.cap.read( ) if ret == True: self.frames[self.length] = frame[x/2:x/2+size, y/2:y/2+size] im = feature.feature(frame) self.res[self.length] = [ im.svmclassify(),im.get_patch() ] #有效帧长度 self.length += 1 print self.length, self.res[self.length-1] #当前帧 self.cur = 0 self.weathers = { 'fog':0, 'snow':0, 'rain':0, 'sunny':0} self.queue = Queue.Queue(10)
def train(self, train_target, train_samples): self.classifyers = {} t = len(self.keyword_detection_list) self.classes = {} for each in train_target: for x in each: self.classes[x] = t self.keywordname = [0] * len(self.keyword_detection_list) for i, x in enumerate(self.keyword_detection_list): self.classes[x] = i self.keyword_detection_list.append('') Y = [0] * len(train_samples) NewTrain = [] Ynew = [] for i, each in enumerate(train_target): assert (self.classes[each[0]] < len(self.keyword_detection_list)) Y[i] = self.classes[each[0]] for r in each[1:]: Ynew.append(self.classes[r]) NewTrain.append(train_samples[i]) train_samples.extend(NewTrain) Y.extend(Ynew) #print len(self.keyword_detection_list) print Y #assert(False) self.bow = feature.feature(self.featurename, train_samples, keywords=self.keyword_detection_list) self.clf = LogisticRegression(solver='lbfgs', warm_start=True, multi_class='multinomial') sets = int(len(train_samples) / 10) # for each in range(0, len(train_samples),sets): # X = self.bow.get_incremental_features(train_samples[each:each+sets],Train=True) # print X.shape, len(Y[each:each+sets]) # self.clf.fit(X,Y[each:each+sets]); # print 'iteration ... %d' %each X = self.bow.get_incremental_features(train_samples, Train=True) self.clf.fit(X, Y) self.train_target = train_target self._prepared = True
def train(self,train_target,train_samples): self.classifyers={}; t = len(self.keyword_detection_list) self.classes={}; for each in train_target: for x in each: self.classes[x] = t; self.keywordname = [0]*len(self.keyword_detection_list); for i,x in enumerate(self.keyword_detection_list): self.classes[x]=i self.keyword_detection_list.append(''); Y = [0]*len(train_samples); NewTrain = [] Ynew = [] for i,each in enumerate(train_target): assert(self.classes[each[0]]<len(self.keyword_detection_list)) Y[i] = self.classes[each[0]] for r in each[1:]: Ynew.append(self.classes[r]) NewTrain.append(train_samples[i]) train_samples.extend(NewTrain); Y.extend(Ynew); #print len(self.keyword_detection_list) print Y #assert(False) self.bow = feature.feature(self.featurename,train_samples,keywords=self.keyword_detection_list); self.clf = LogisticRegression(solver='lbfgs',warm_start=True,multi_class='multinomial'); sets = int(len(train_samples)/10); # for each in range(0, len(train_samples),sets): # X = self.bow.get_incremental_features(train_samples[each:each+sets],Train=True) # print X.shape, len(Y[each:each+sets]) # self.clf.fit(X,Y[each:each+sets]); # print 'iteration ... %d' %each X = self.bow.get_incremental_features(train_samples,Train=True) self.clf.fit(X,Y); self.train_target = train_target; self._prepared = True;
def state_rewards(self, fw): """Compute the individual state rewards and return them as a dictionary with keys that describe the rewards.""" rewards = {} state_r = feature.feature(lambda t, x, u: 0.0) for i, (lane, w_lane) in enumerate(zip(self.world.lanes, self.w_lanes)): if self.is_human: lane_gaussian_std = constants.LANE_REWARD_STDEV_h else: lane_gaussian_std = constants.LANE_REWARD_STDEV_r rewards['lane gaussian ' + str(i)] = w_lane * lane.gaussian( fw=fw, stdev=lane_gaussian_std) for i, (fence, w_fence) in enumerate(zip(self.world.fences, self.w_fences)): if self.fence_sigmoid: # sigmoid fence reward rewards['fence sigmoid ' + str(i)] = w_fence * fence.sigmoid(fw=fw) else: # gaussian-shaped fence reward rewards['fence gaussian ' + str(i)] = w_fence * fence.gaussian(fw=fw) if self.speed is not None: rewards['speed'] = self.w_speed * feature.speed(self.speed) for i, (other_car_traj, w_other_car_traj) in enumerate( zip(self.other_car_trajs, self.w_other_car_trajs)): if self.is_human: w = w_other_car_traj else: w = w_other_car_traj if self.fine_behind: rewards['other traj gaussian ' + str(i)] = ( w * other_car_traj.gaussian(fw, length=.1, width=.03)) else: rewards['other traj gaussian ' + str(i)] = ( w * other_car_traj.gaussian(fw, length=.14, width=.03)) rewards['other traj not behind ' + str(i)] = other_car_traj.not_behind(fw, self.w_behind) for i, (other_truck_traj, w_other_truck_traj) in enumerate( zip(self.other_truck_trajs, self.w_other_truck_trajs)): rewards['other truck sigmoid ' + str(i)] = (w_other_truck_traj * other_truck_traj.sigmoid(fw)) return rewards
def predict_main(): fi_test = open(test_file, 'r') for line in fi_test: corpus = gen_training_corpus(line) test_corpora = gen_test_corpora(corpus.title) if test_corpora == None: continue print '--------------' max_proba = 0.0 pred_label = -1 for corpus in test_corpora: feats = feature(corpus.title, corpus.person1, corpus.person2) cls = classfier.predict_proba(feats) for i in range(len(relations)): if cls[0][i] > max_proba: max_proba = cls[0][i] pred_label = i print pred_label, max_proba print '--------------' fi_test.close()
def r(self): ret, self.frame_pre = self.cap.read() self.frame_pre = cv2.blur(self.frame_pre, (5, 5)) while(self.cap.isOpened()): ret, self.frame = self.cap.read() if ret == True: play = self.frame #图像平滑, 平均滤波器 #self.frame = cv2.blur(self.frame, (5,5)) #前后帧做差,前后帧相同就不重新检测 #if not (self.frame == self.frame_pre).all(): # diff = self.frame - self.frame_pre #else: # continue #计算特征 features = feature.feature(self.frame) #res = features.classify() res = features.get_patch() if self.queue.full(): self.weathers[self.queue.get()] -= 1 self.weathers[res] += 1 self.queue.put(res) #最近10次检测出现次数最多的为res M = max([self.weathers[i] for i in self.weathers]) for w in self.weathers: if self.weathers[w] == M: res = w break print self.weathers, self.queue.qsize() cv2.putText(play, res, (0,50), self.font, 4, (255,255,255), 2) #显示 #cv2.imshow('frame', play) #self.frame_pre = self.frame #if cv2.waitKey(1) & 0xFF == ord('q'): # break else: break
def _initializeFeature(self, robotPose, measurement): rx = robotPose[0] ry = robotPose[1] rTheta = robotPose[2] mTheta = measurement[0] mDist = measurement[1] # Add measurement relative position over robot position. position = np.array([ rx + mDist * math.cos(mTheta + rTheta), ry + mDist * math.sin(mTheta + rTheta) ]) H = self._calculateMeasurementJacobian(robotPose, position) H_inv = la.inv(H) # Transform measurement space covariance to state space. cov = np.dot(H_inv, np.dot(self.measurementNoiseCov, H_inv.transpose())) return f.feature(position, cov)
def train(c=500, g=5.383): logging.info('train start ...') svm_params = dict( kernel_type = cv2.SVM_LINEAR, svm_type = cv2.SVM_C_SVC, C = c, gamma=g ) s = 0 for root, dirs, files in os.walk('./images'): i = 0 for d in dirs: for rroot, ddirs, ffiles in os.walk('./images/%s'%d): s += len(ffiles) trainData = np.linspace(0.0, 0.0, 169*s) trainData.shape = s, 169 responses = np.linspace(0, 0.0, s) responses.shape = s,1 for d in dirs: for rrroot, dddirs, fffiles in os.walk('./images/%s'%d): for f in fffiles: responses[i] = np.float32(d) logging.info('get features : ' + './images/%s/%s' %(d,f)) im = feature.feature('./images/%s/%s' %(d,f)) trainData[i] = np.float32(im.get_features()) i += 1 #不重复进其他子目录 break #这是个坑,只支持float32类型 trainData = np.array(trainData, dtype = np.float32) responses = np.array(responses, dtype = np.float32) svm = cv2.SVM() svm.train(trainData, responses, params=svm_params) svm.save('svm_data.dat') logging.info('train end .data is saved')
def TFmodelBuild(self): regularizer = None if self.regulation: regularizer = tf.contrib.layers.l2_regularizer(scale=0.1) x_seq = tf.placeholder("float", shape=[None, None, 26], name="input_x1") x_pair = tf.placeholder("float", shape=[None, None, None, 5], name="input_x2") y_ = tf.placeholder("float", shape=None, name="input_y") channel_step = 2 with tf.name_scope('input_1d'): net = x_seq print "Input channels = %d" % net.get_shape().as_list()[-1] ######## 1d Residual Network ########## out_channels = net.get_shape().as_list()[-1] for i in xrange(self.block_1d): #1D-residual blocks building self.block1d_num += 1 out_channels += channel_step net = res_block_1d(net, out_channels, self.filter_size_1d, regularizer, batch_norm=self.BN, name="ResidualBlock_1D_" + str(self.block1d_num)) ####################################### print "After conv_1d channels = %d" % net.get_shape().as_list()[-1] # Conversion of sequential to pairwise feature with tf.name_scope('1d_to_2d'): net = seq2pairwise(net) # Merge coevolution info(pairwise potential) and above feature if self.block_1d == 0: net = x_pair else: net = tf.concat([net, x_pair], axis=3) out_channels = net.get_shape().as_list()[-1] print "Add 1d to 2d, channels = %d" % net.get_shape().as_list()[-1] ######## 2d Residual Network ########## for i in xrange(self.block_2d): #2D-residual blocks building self.block2d_num += 1 out_channels += channel_step net = res_block_2d(net, out_channels, self.filter_size_2d, regularizer, batch_norm=self.BN, name="ResidualBlock_2D_" + str(self.block2d_num)) ####################################### print "After conv_2d channels = %d" % net.get_shape().as_list()[-1] # softmax channels of each pair into a score with tf.variable_scope('softmax_layer', values=[net]) as scpoe: W_out = weight_variable([1, 1, out_channels, 2], regularizer, 'W') b_out = bias_variable([2], 'b') output_prob = tf.nn.softmax( tf.nn.conv2d(net, W_out, strides=[1, 1, 1, 1], padding='SAME') + b_out) with tf.name_scope('loss_function'): loss = loss1(output_prob, y_) if self.regulation: reg_variables = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = tf.contrib.layers.apply_regularization( regularizer, reg_variables) loss += reg_term tf.summary.scalar('loss', loss) with tf.name_scope("training"): train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss) saver = tf.train.Saver() tf.add_to_collection('cal_value', output_prob) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) merged_summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter( logs_file + time.strftime("%Y-%m-%d-%H-%M", time.localtime()), sess.graph) ### Loading database F = feature(train_file, valid_file, test_file) train_data, valid_data, test_data = F.get_feature() X1 = train_data[0] X2 = train_data[1] Y = train_data[2] j = 0 ### Training for epoch in xrange(40): # epoch = 40 # training with one sample in a batch for i in xrange(len(X1)): x1 = X1[i][np.newaxis] x2 = X2[i][np.newaxis] y = Y[i][np.newaxis] if y.shape[-2] > 500: continue y_onehot = one_hot(y) sess.run(train_step, feed_dict={ x_seq: x1, x_pair: x2, y_: y_onehot }) if j % 500 == 0: summary_str, y_out, train_loss = sess.run( [merged_summary_op, output_prob, loss], feed_dict={ x_seq: x1, x_pair: x2, y_: y_onehot }) summary_writer.add_summary(summary_str, j) acc = topKaccuracy(y_out, y, 2) print "training %d, accuracy = %f" % (j + 1, acc[2]) j += 1 model_save_path = models_file + time.strftime( "%Y-%m-%d-%H-%M", time.localtime()) saver.save(sess, model_save_path + "/model.ckpt") ### Testing X1_test = test_data[0] X2_test = test_data[1] Y_test = test_data[2] for i in xrange(len(X1_test)): x1 = X1_test[i][np.newaxis] x2 = X2_test[i][np.newaxis] y = Y_test[i][np.newaxis] y_out = sess.run(output_prob, feed_dict={ x_seq: x1, x_pair: x2 }) acc_k_1 = topKaccuracy(y_out, y, 1) acc_k_2 = topKaccuracy(y_out, y, 2) acc_k_5 = topKaccuracy(y_out, y, 5) acc_k_10 = topKaccuracy(y_out, y, 10) print "testing %d:" % (i + 1) print "when k = 1:" print " long-range accuracy = %f" % acc_k_1[0] print " medium-range accuracy = %f" % acc_k_1[1] print " short-range accuracy = %f" % acc_k_1[2] print "when k = 2:" print " long-range accuracy = %f" % acc_k_2[0] print " medium-range accuracy = %f" % acc_k_2[1] print " short-range accuracy = %f" % acc_k_2[2] print "when k = 5:" print " long-range accuracy = %f" % acc_k_5[0] print " medium-range accuracy = %f" % acc_k_5[1] print " short-range accuracy = %f" % acc_k_5[2] print "when k = 10:" print " long-range accuracy = %f" % acc_k_10[0] print " medium-range accuracy = %f" % acc_k_10[1] print " short-range accuracy = %f" % acc_k_10[2]
import cv2 import numpy as np import feature, sys, os d = '1' if len(sys.argv) > 1: d = sys.argv[1] save = np.array([]) for root, dirs, files in os.walk('./test/%s'%d): for f in files: im = feature.feature('test/%s/%s'%(d,f)) res = im.get_features() print res.shape save = np.r_[save,res] np.save('fog2snow%stest.npy'%d, save)
classes = set(classes) print 'Total number of classes for this model ', len(classes) class_example_count = [] for each in classes: Y = [1 if each in x else 0 for x in train_target] class_example_count.append(sum(Y)) #print class_example_count #print len(train_target) #assert(sum(class_example_count) == len(train_target)) print 'examples seen for each class during training ', class_example_count classes = ['python'] #Feature Model if not LOGISTIC_REGRESSION: bow = feature.feature("bow", train_samples) else: bow_trimmed = feature.feature("bow_bigram", train_samples) metric = [] #Classifier Model : Train for each in classes: #Balancing dataset target_y = [1 if each in x else 0 for x in train_target] [target_y_balanced, train_balanced] = load.split_equally(target_y, train_samples) print 'Training to tag %s from %d samples' % (each, len(target_y_balanced)) Y = np.array(target_y_balanced) if not LOGISTIC_REGRESSION: X = bow.get_incremental_features(train_balanced)
def generateTrainingData(self, err, localCursor, query, sorted_resource_jobs, sorted_tool_jobs): print query % err localCursor.execute(query, (err)) X, Y, D = [], [], [] count = 0 ts = sorted((self.tools).items(), key=lambda x: x[1]) tool_names = [] for each in ts: tool_names.append(each[0]) feat_names = (['resource'] * len(self.resources)) + tool_names + self.tests.keys( ) + ['submitted_since'] * (self.limit + 1) + ( ['jobs_since'] * (self.limit + 1)) + ['last_tool_job' ] + (['last_job'] * self.limit) print 'tools ' + str(len(self.tools.keys())), 'tests ' + str( len(self.tests.keys())) for (jid, res, tool, date_entered, name) in localCursor: count += 1 res_feat = [0] * len(self.resources) res_feat[self.resource_no[res]] = 1 test_feat = self._testFeatures(date_entered) tool_feat = [0] * (len(self.tools.keys())) tool_feat[self.tools[tool]] = 1 lastJobs, lastToolJob, jobs_since_feat, submitted_since = self._lastJobResult( sorted_resource_jobs, res, sorted_tool_jobs, self.tools[tool], date_entered) feat = feature(jid, res_feat, tool_feat, test_feat, jobs_since_feat, lastJobs, test_time=submitted_since, otherFeatures=lastToolJob) X.append(feat.toList()) Y.append(int(name == 'FINISHED')) D.append(date_entered) if (count % 1000 == 0): sys.stdout.write("Created %d feature vectors.\n" % count) sys.stdout.write("Created %d feature vectors.\n" % count) XPath = os.path.join(self.datadir, err + 'X') YPath = os.path.join(self.datadir, err + 'Y') DPath = os.path.join(self.datadir, err + 'D') if os.path.isdir(XPath): shutil.rmtree(XPath) if os.path.isdir(YPath): shutil.rmtree(YPath) if os.path.isdir(DPath): shutil.rmtree(DPath) if len(Y) > 2: print "Dumping features of type %s" % err #create a new folder #define a beginning point and put overflow in files os.mkdir(XPath) os.mkdir(YPath) os.mkdir(DPath) beg = 0 count = 1 while len(Y) > beg: end = min(50000 * count, len(Y)) print os.path.join(XPath, str(count) + '.pkl') pickle.dump( X[beg:end], open(os.path.join(XPath, str(count) + '.pkl'), 'wb')) pickle.dump( Y[beg:end], open(os.path.join(YPath, str(count) + '.pkl'), 'wb')) print os.path.join(DPath, str(count) + '.pkl') pickle.dump( D[beg:end], open(os.path.join(DPath, str(count) + '.pkl'), 'wb')) beg = end count += 1 pickle.dump(feat_names, open(os.path.join(self.datadir, 'feat_names.pkl'), 'wb'))
#Make sure we are running inside a virtualenv if not hasattr(sys, 'real_prefix') and detector.GLOBAL_WINDOWS==False: print "No virtualenv set. Please activate a virtualenv before running." sys.exit() if __name__=='__main__': loader = dataloader(label_file, image_directory, num_images) if(reprocess): data = loader.get_data() image_paths, labels=zip(*data) process = preprocess(image_paths) processed = process.process_images() feature_extractor = feature.feature() features = feature_extractor.extract_features(processed) #loader.write_csv('vecs.csv', map(list, zip(labels, features))) else: rows = loader.load_features('vecs.csv') print np.array(rows).shape labels, features = zip(*rows) print features model = model.model_w_cv(features, labels, n_fold_cv) model.process(12000, 1.5) #model = model(features, labels, n_fold_cv) #print model.fixed_params(0.4, 1.5) #model.optimise_sgd()
#coding=utf-8 import cv2, os , feature,sys import numpy as np count = 0 t = [0,0,0] count = 0 for root, dirs, files in os.walk('./cut/%s'%sys.argv[1]): for f in files: im = feature.feature('./cut/%s/%s'%(sys.argv[1],f)) res = im.get_patch()[0][0] print res t[int(res)] += 1 count += 1 print t[int(sys.argv[1])]/float(count) ''' im = cv2.imread('./cut/2/%s'%f) print im.shape[0]/100, im.shape[1]/100, im.shape for i in range(im.shape[0]/100): for j in range(im.shape[1]/100): image = im[i*100: (i+1)*100, j*100: (j+1)*100] # print type(im[i*100: (i+1)*100, j*100: (j+1)*100]), im[i*100: (i+1)*100, j*100: (j+1)*100].shape cv2.imwrite('./cut/3/%s.jpg'%count, image) count += 1 '''
classes = set(classes); print 'Total number of classes for this model ', len(classes) class_example_count = [] for each in classes: Y =[1 if each in x else 0 for x in train_target ]; class_example_count.append(sum(Y)); #print class_example_count #print len(train_target) #assert(sum(class_example_count) == len(train_target)) print 'examples seen for each class during training ' ,class_example_count classes=['python'] #Feature Model if not LOGISTIC_REGRESSION: bow = feature.feature("bow",train_samples); else: bow_trimmed = feature.feature("bow_bigram",train_samples); metric = []; #Classifier Model : Train for each in classes: #Balancing dataset target_y = [1 if each in x else 0 for x in train_target ]; [target_y_balanced, train_balanced]=load.split_equally(target_y,train_samples) print 'Training to tag %s from %d samples' %(each ,len(target_y_balanced)) Y =np.array(target_y_balanced); if not LOGISTIC_REGRESSION: X = bow.get_incremental_features(train_balanced); else:
#coding=utf-8 import feature, os for root, dirs, files in os.walk('./old'): for f in files: im = feature.feature('./old/%s'%f) print f, im.classify()
def buildJobFeaturesWithXsede(self,fname,startDate,endDate): localCursor = self.localConn.cursor() try: #use the given database localCursor.execute("USE " + self.localdb) #get sorted jobs from resources sorted_resource_jobs={} for r in self.resource_tables.keys(): sorted_resource_jobs[r]=self._sortResourceJobs(r, localCursor) #query event_stats table fields = ("resource,tool_id,date_entered, date_terminated,wallduration," "nodecount,processors,queue,name") where = "where date_entered>"+startDate+" AND date_entered<" + endDate + " AND (value like %s)" query = "SELECT " + fields + " FROM cipres_xsede " + where for err in self.errors: print query % err localCursor.execute(query,(err)) X,Y,D=[],[],[] #generate features count = 0 for (res, tool, date_start, date_term, wd, nc, procs, queue, name) in localCursor: count += 1 feat_resources=[0]*len(self.resources) feat_resources[self.resource_no[res]]=1 feat_single = self._singleJobFeature(res, tool, date_start, date_term, wd, nc, procs, queue, name) feat_other = [wd,nc,procs,int(queue=='shared')] test_feat = self._testFeatures(date_start) feat_tools = [0]*len(self.tools.keys()) feat_tools[self.tools[tool]] = 1 lastJob = self._lastJobResult(sorted_resource_jobs,res, date_start) feat = feature(feat_resources, feat_tools, test_feat, lastJob, feat_single, feat_other) if count==1: feat.printFeat() X.append(feat.toList()) Y.append(int(name=='FINISHED')) D.append(date_start) if(count%10000==0): sys.stdout.write("Created %d feature vectors.\n" % count) sys.stdout.write("Created %d feature vectors.\n" % count) feat_names =(['resource']*len(self.resources))+self.tools.keys()+self.tests.keys()+['last_job']+(['Single_job_feature']*len(feat_single))+['Wallduration']+['node_count']+['processors']+['queue'] if len(Y)>2: print "Dumping features of type %s"%err #create a new folder #define a beginning point and put overflow in files XPath=self.datadir+err+'X' YPath=self.datadir+err+'Y' DPath=self.datadir+err+'D' if os.path.isdir(XPath): shutil.rmtree(XPath) os.mkdir(self.datadir+err+'X') if os.path.isdir(YPath): shutil.rmtree(YPath) os.mkdir(self.datadir+err+'Y') if os.path.isdir(DPath): shutil.rmtree(DPath) os.mkdir(self.datadir+err+'D') beg=0 count=1 while len(Y)>beg: end = min(50000*count, len(Y)) pickle.dump(X[beg:end],open(self.datadir+err+'X'+'/'+str(count)+'.pkl','wb')) pickle.dump(Y[beg:end],open(self.datadir+err+'Y'+'/'+str(count)+'.pkl','wb')) pickle.dump(D[beg:end],open(self.datadir+err+'D'+'/'+str(count)+'.pkl','wb')) beg=end count+=1 pickle.dump(feat_names,open(self.datadir+'feat_names.pkl','wb')) finally: #close the connection localCursor.close()
#coding=utf-8 import os, feature, sys, logging import numpy as np logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename='r.log', filemode='a') save = np.array([]) for root, dirs, files in os.walk('./images'): for d in dirs: if d != sys.argv[1]: continue for rrroot, dddirs, fffiles in os.walk('./images/%s'%d): for f in fffiles: im = feature.feature('./images/%s/%s' %(d,f)) res = im.snow_region() save = np.r_[save,res] logging.info('%s %s res:%s' %(d,f,res)) print res break np.save('array%s.npy'%sys.argv[1], save)
#coding=utf-8 from feature import feature if __name__ == '__main__': features = feature('黄义达与朱孝天前女友佐藤麻衣擦出爱火花(图)', '朱孝天', '佐藤麻衣') print features
def function(file): feature.feature(file, 1)