def install(self): # Check if package installed db = hpakDB(self.pkg_name) if db.get_value("status") == "installed": misc.print_error("%s - already installed!" % (self.pkg_name), False) return self.prepare_install() dl = download(self.options['source'], self.pkg_path, self.pkg_name) dl.get() # Extracting the file. e = Extractor(self.options) e.extract() # Install depends self.install_dep() Cmds = self.options['install'].split(',') for cmd in Cmds: subprocess.Popen(cmd, shell=True).wait() # Verify package installed. if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])): db = hpakDB(self.pkg_name) db.set_value("status", "installed") misc.print_success("%s installed." % (self.pkg_name)) else: misc.print_error( "%s-%s NOT installed, please try again." % (self.pkg_name, self.options['version']), True)
def install(self): # Check if package installed db = hpakDB(self.pkg_name) if db.get_value("status") == "installed": misc.print_error("%s - already installed!" % (self.pkg_name), False) return self.prepare_install() dl = download(self.options['source'], self.pkg_path, self.pkg_name) dl.get() # Extracting the file. e = Extractor(self.options) e.extract() # Install depends self.install_dep() Cmds = self.options['install'].split(',') for cmd in Cmds: subprocess.Popen(cmd, shell=True).wait() # Verify package installed. if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])): db = hpakDB(self.pkg_name) db.set_value("status", "installed") misc.print_success("%s installed." % (self.pkg_name)) else: misc.print_error("%s-%s NOT installed, please try again." % (self.pkg_name, self.options['version']), True)
def __init__(self, filename): super(Database, self).__init__() Extractor.extract(filename) lines = '' with open(filename, 'r') as f: lines = f.readlines() self.courses = list() for i in lines: x = i.split(',') x = [y.strip('()"') for y in x] self.courses.append(Course(x[0],x[1],x[2],x[3],x[4],x[5],x[6]))
def start(input_rom: str = None, output_folder: str = "./out") -> None: ''' Extracts data from the input rom. input_rom: str The path of the rom to extract data from. output_folder: str The path where the data extracted from the ROM will be saved. ''' nds: NDS = NDS() extractor = Extractor(input_rom=input_rom, output_folder=output_folder, nds=nds) extractor.extract()
def extract_features(seq_length=40, class_limit=2, image_shape=(299, 299, 3)): # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) # get the model. model = Extractor(image_shape=image_shape) # Loop through data. pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = os.path.join('/content','Geriatrics_Data','Video','sequences', video[2] + '-' + str(seq_length) + \ '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
def test_cond(self): from masks import mask e = Extractor() logging.debug(e) e.add_feature_condition(mask) res = e.extract(self.data) self.assertTrue(len(res[self.data.keys()[0]]) > 0)
def extract_one_feature(video, frame_path, sequence_path, seq_length=400, feature_length=2048): model = Extractor() # model_resnet50 = Extractor(model_name='resnet50') img_list = glob.glob(os.path.join(frame_path, video + '_*.jpg')) if len(img_list) == 0: return seqfile = os.path.join(sequence_path, video + '_' + str(seq_length) + '.npy') # Check if we already have it. if os.path.isfile(seqfile): return img_list_sorted = sorted(img_list) frames = rescale_list(img_list_sorted, seq_length) sequence = [] for image in frames: if image != -1: features = model.extract(image) # features_resnet50 = model_resnet50.extract(image) # features = np.concatenate((features, features_resnet50.flatten())) else: features = np.zeros((feature_length, 1)) sequence.append(features) # Save the sequence. np.save(seqfile, sequence)
def extract(data, seq_length, video_name): # get the model. model = Extractor() # init the sequence sequence = [] # First, find the sample row. sample = None for row in data.data: if row[2] == video_name: sample = row break if sample is None: raise ValueError("Couldn't find sample: %s" % video_name) # Get the frames for this video. frames = data.get_frames_for_sample(sample) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. for image in frames: features = model.extract(image) sequence.append(features) sequence = np.asarray(sequence) return sequence
def extract_features(params, input_): if (os.path.exists(input_) == False): raise Exception("Input not found") #Params model_def = params['model_file'][0] pretrained_model = params['pretrained_model'][0] image_dims = None if params['img_dims'][0] == 'None' else params[ 'img_dims'][0] mean = None if params['mean'][0] == 'None' else params['mean'][0] input_scale = None if params['input_scale'][0] == 'None' else params[ 'input_scale'][0] raw_scale = None if params['raw_scale'][0] == 'None' else params[ 'raw_scale'][0] channel_swap = None if params['channel_swap'][0] == 'None' else params[ 'channel_swap'][0] layer = None if params['layer'][0] == 'None' else params['layer'][0] #Instatiate Extractor class extractor = Extractor(model_def, pretrained_model, image_dims=image_dims, mean=mean, input_scale=input_scale, raw_scale=raw_scale, channel_swap=channel_swap, layer=layer) output = extractor.extract(input_) return output, layer
def extract_features(): Model = Extractor() current_dir = os.getcwd() vid_name = VIDEO_PATH.split('/') vid_name= vid_name[len(vid_name) - 1] seq_path = re.sub(r'\.\w{3}', '-features.txt', vid_name) if not os.path.isfile(seq_path): get_frames(VIDEO_PATH) vid_frame_fmt = re.sub(r'\.\w{3}', '*.jpg', VIDEO_PATH) frames = glob.glob(vid_frame_fmt) if len(frames) > SEQ_LENGTH: # downsample number of frames to SEQ_LENGTH skip = len(frames) new_frames = [frames[i] for i in range(0, len(frames), skip)] sequence = [] for frame in frames: features = Model.extract(frame) sequence.append(features) np.savetxt(seq_path, sequence) print "Sequence file saved to %s" % seq_path else: print "Feature vector text file already exists for %s" % VIDEO_PATH
def extract(inDir, seqName, dataDir, seqLength): # Get the dataset. data = DataSet(seqName, seqLength, inDir, dataDir) # get the model. model = Extractor(seqName) # Loop through data. max_depth = 0 bottom_most_dirs = [] # data = listOfDirectories; for thisDir in data.dataLowest: # Get the path to the sequence for this video. npypath = os.path.join(thisDir, seqName) frames = sorted(glob.glob(os.path.join(thisDir, '*png'))) sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(npypath, sequence) """Main Thread"""
def frames_to_features(frames): """ Extract InceptionV3 features from all images in path return list with extracted features """ # ensure max number of frames if (len(frames) > frames_nb): print("Only using first %d of %d frames" % (frames_nb, len(frames))) frames = frames[:frames_nb] # get the model print("Load Inception v3 network ...") cnn = Extractor() # loop thru sequence = [] timer_start() for image in frames: print("Extracting features from", image) features = cnn.extract(image) sequence.append(features) timer_stop() print("Extracted features from %d frames" % len(frames)) return sequence
class IndexTrainer(object): def __init__(self): self.index = InvertedIndex() self.bow = Bow() self.extractor = Extractor('surf') print self.index.author print self.index.description def load_feature(self, path='../models/feature.npy'): self.features = np.load(path) if len(self.features) > 200000: self.features = self.features[:200000] print "feature shape: ", self.features.shape return self.features def run(self, path): self.bow.load() self.index.reset(self.bow.centers) images = imutil.get_list_image(path) t = imutil.Timer(1) t.tic() for i,image in enumerate(images): descriptors = self.extractor.extract(image) self.index.append(image, descriptors) if (i+1)%1000 == 0: t.toc('finish 1000 images: ') t.tic()
def get_data(sample_num): #random pick samples from classes random_images = [] #images at <Projects>/data/places365 class_folders = glob.glob(os.path.join('data', 'place365', '*')) for class_item in class_folders: images = glob.glob(os.path.join(class_item, '*.jpg')) for _ in range(sample_num): # Get a random row. sample = random.randint(0, len(images) - 1) image = images[sample] random_images.append(image) random_images = sorted(random_images) classes = glob.glob(os.path.join('data', 'place365', '*')) classes = sorted([item.split(os.path.sep)[-1] for item in classes]) # get the feature extract model model = Extractor() # Now loop through and extract features to build the sequence. sequence = [] labels = [] pbar = tqdm(total=len(random_images)) for image in random_images: features = model.extract(image) sequence.append(features) label = image.split(os.path.sep)[-2] label = classes.index(label) labels.append(label) pbar.update(1) return np.array(sequence), np.array(labels), len(sequence), len( sequence[0])
def run(args): src = os.path.abspath(args["src"]) if not os.path.exists(src): raise Exception("Source directory (%s) does not exist." % (src)) dst = os.path.abspath(args["dst"]) os.makedirs(dst, exist_ok=True) model = Extractor() model.set_defs("./definitions/defs.json") files = collect_files(src) for f in files: name = f[0:-4] src_file = os.path.join(src, f) xml_file = os.path.join(dst, name + ".xml") xml_args = ["-o", xml_file, src_file] try: status = pdfToXML.convert_to_xml(xml_args) if not status: continue r = Reader({"src": xml_file}) blocks, kv, texts = r.get_content() content = {"texts": texts, "blocks": blocks, "kv": kv} model.set_content(content) df = model.extract() output_file = os.path.join(dst, name + ".%s" % args["output"]) if args["output"] == "csv": df.to_csv(output_file) elif args["output"] == "xlsx": writer = pandas.ExcelWriter(src_file) df.to_excel(writer) writer.save() except Exception: pass
def extract_and_conv(data, seq_length, video_name): # get the model. model = Extractor() # init the sequence sequence = [] # init the conv output conv_sequence = [] # First, find the sample row. sample = None for row in data.data: if row[2] == video_name: sample = row break if sample is None: raise ValueError("Couldn't find sample: %s" % video_name) # Get the frames for this video. frames = data.get_frames_for_sample(sample) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features & conv output sequence. for image in frames: features = model.extract(image) sequence.append(features) # Get last conv layer output. conv_out = model.get_convout(image) conv_sequence.append(conv_out) return frames, sequence, conv_sequence
def Classify(self, request_iterator, context): saved_model = 'data/checkpoints/lstm-features.037-0.131.h5' point_count = 0 seq_length = 40 class_limit = 10 # Number of classes to extract. Can be 1-101 or None for all. data = DataSet(seq_length=seq_length, class_limit=class_limit) modelE = Extractor() model = load_model(saved_model) sequence = [] for Chunk in request_iterator: byt = Chunk.Content byt = pickle.loads(byt) features = modelE.extract(byt) sequence.append(features) point_count += 1 if point_count == 40: print(np.shape(sequence)) prediction = model.predict(np.expand_dims(sequence, axis=0)) print(prediction) message = [] classs = [] sorted_lps = data.print_class_from_prediction( np.squeeze(prediction, axis=0)) if sorted_lps is not None: for i, class_prediction in enumerate(sorted_lps): if i > 10 - 1 or class_prediction[1] == 0.0: break print("%s: %.2f" % (class_prediction[0], class_prediction[1])) message.append(class_prediction[1]) classs.append(class_prediction[0]) first = class_prediction[0] v1 = class_prediction[1] yield humanaction_pb2.label(message1=message[0], message2=message[1], message3=message[2], message4=message[3], message5=message[4], message6=message[5], message7=message[6], message8=message[7], message9=message[8], message10=message[9], class1=classs[0], class2=classs[1], class3=classs[2], class4=classs[3], class5=classs[4], class6=classs[5], class7=classs[6], class8=classs[7], class9=classs[8], class10=classs[9]) sequence = [] point_count = 0
def test_monotony(self): from masks import absolute_monotony as monotony e = Extractor() logging.debug(e) e.add_feature_condition(monotony.Raising) e.add_feature_condition(monotony.Falling) res = e.extract(self.data) logging.debug("res: \n%s", pprint.pformat(res)) self.assertTrue(len(res[self.data.keys()[0]]) > 0)
def extract(self): # Extracts forum posts from .json-files generated by Scraper lines = [] for category in self.categories: lines.append( Extractor.extract(('flashback' + str(category) + '.json'), ('extracted' + str(category) + '.txt'))) dataset_divider.Divider.divide( ('extracted' + str(category) + '.txt'), lines[len(lines) - 1])
def main(): ''' Entry point when executing from commandline. ''' parser=argparse.ArgumentParser() parser.add_argument('--max', help='The maximum number of data points to be used while training model. \ if empty, the whole training set will be used.') parser.add_argument('--C', type=float, default=1.0, help='C parameter for SVC algorithm. \ if empty, C will be set to 1.0.') parser.add_argument('--gamma', type=float, default=.01, help='Gamma parameter for SVC algorithm. \ if empty, gamma will be set to 0.01.') parser.add_argument('--kernel', type=str, default='linear', help='Kernel for SVC algorithm. \ if empty, linear kernel will be used. Only linear and rbf kernel are supported at the moment.') args=parser.parse_args() if (args.max and not is_number(args.max)) or \ (args.kernel != 'kernel' and args.kernel != 'rbf'): print(constants.TRAINER_HELP_MSG) return try: extractor = Extractor() # Extract train set from archive file train_set, _, _ = extractor.extract(constants.MNIST_DATASET_PATH) if args.max: feature_set = train_set[0][:int(args.max)] label_set = train_set[1][:int(args.max)] else: feature_set = train_set[0] label_set = train_set[1] label_encoder = LabelEncoder() labels = label_encoder.fit_transform(label_set) # Use SVC to train a recognition model recognizer = SVC(C=args.C, gamma=args.gamma, kernel=args.kernel, probability=True) recognizer.fit(feature_set, labels) # Write trained model and label encoder to file with open(constants.MODEL_FILE_PATH, 'wb') as f: f.write(pickle.dumps(recognizer)) with open(constants.LABEL_ENCODER_FILE_PATH, 'wb') as f: f.write(pickle.dumps(label_encoder)) print('Training done') except MemoryError: # The training dataset is quite big, more than 4GB of RAM and python 64 bit is required print('An memory error has occurred, please check if you have enough memory \ and you are using python 64bit') except Exception as e: print('An error has occurred') print(str(e))
class Extraktor(object): def __init__(self): self.extractor = Extractor() self.sqs = boto3.client('sqs') self.queue_url = 'https://sqs.ap-southeast-1.amazonaws.com/841662669278/crawler' self.s3 = boto3.client('s3') self.dynamodb = boto3.resource('dynamodb') self.bloom_filter = MyBloomFilter(self.dynamodb.Table('link')) def process(self): while True: ret = self.sqs.receive_message( QueueUrl=self.queue_url, MaxNumberOfMessages=10, WaitTimeSeconds=1 ) if 'Messages' not in ret: continue for msg in ret['Messages']: key = msg['Body'] record = self.s3.get_object(Bucket='samuel-html', Key=key) #pack['Body'] botocore.response.StreamingBody pack = json.loads(lzo.decompress(record['Body'].read()).decode('utf-8')) # response = self.client.delete_message( # QueueUrl=self.queue_url, # ReceiptHandle=msg['ReceiptHandle'] # ) # print(response) self.bloom_filter.add(pack['url']) if pack.get('code') == 200: url = pack['url'] ret = self.extractor.extract(pack) for link in ret['links']: if not self.bloom_filter.add(link['url']): seed(link) else: #print 'already crawled', link['url'] pass #save pack to tbl_link self.dynamodb.Table('link').put_item( Item = { 'url': url, 'ctime': Decimal(str(time.time())), 'utime': Decimal(str(time.time())) } ) logger.info("%s ok" % (pack['url'])) else: logger.warn("%s not ok code:%d" % (pack['url'], pack.get('code'))) response = self.sqs.delete_message( QueueUrl=self.queue_url, ReceiptHandle=msg['ReceiptHandle'] )
def extract_full_features(weights, seq_length = 40): # Set defaults. class_limit = None # Number of classes to extract. Can be 1-101 or None for all. # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit, check_dir='data/check') # get the model. # model = Extractor() # model = Extractor(weights="data/checkpoints/inception.009-0.29.hdf5") model = Extractor(weights) # Loop through data. print(data.data) pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = os.path.join('data', 'sequences_test', video[2] + '-' + str(seq_length) + \ '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. # frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # print(path) output_dir = os.path.join('data', 'sequences_test') if not (os.path.exists(output_dir)): # create the directory you want to save to os.mkdir(output_dir) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
class Schema(): # Schema get the input from the Collator and the Extractor to feed the Parser # and generate a list of ready json files to be saved def __init__(self, database): self._collator = Collator(database) self._extractor = Extractor() self._parser = Parser() def generate(self, path): grouping_nodes = self._collator.grouping_nodes() print('\n ---- Done Grouping Nodes ----') Schema.print_grouping(grouping_nodes) grouping_relationships = self._collator.grouping_relationships() print('\n ---- Done Grouping Relationships ----') Schema.print_grouping(grouping_relationships) extracted_grouping_nodes = self._extractor.extract(grouping_nodes) print('\n ---- Done Extracting ----') Schema.print_grouping({**extracted_grouping_nodes, ** grouping_relationships}) parsed_list = self._parser.parse(extracted_grouping_nodes, grouping_relationships) self._save(path, parsed_list) def _save(self, path, parsed_list): if not os.path.exists(path): os.makedirs(path) data_folder = Path(path) for item in parsed_list: with open(data_folder / item['$id'], 'w') as parsed_file: json.dump(item, parsed_file, indent=4) @staticmethod def print_grouping(grouping): for k in grouping: print('\nKey: ' + str(k)) print('Properties: ' +str(grouping[k]['props'])) if 'relationships' in grouping[k].keys(): print('Relationships: ' + str(grouping[k]['relationships'])) if 'allOf' in grouping[k].keys(): print('allOf: ' + str(grouping[k]['allOf']))
def extractor_features(data_file, sequences_dir, seq_length, pretrained_model=None, layer_name=None, size=(150, 150)): if not os.path.exists(sequences_dir): os.makedirs(sequences_dir) # Get the dataset. data = DataSet(data_file, sequences_dir, seq_length=seq_length, class_limit=class_limit) # get the model. model = Extractor(pretrained_model, layer_name, size) # Loop through data. pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = sequences_dir + '/' + video[FILE_INDEX] + '-' + str( seq_length) + '-features.txt' # Check if we already have it. if os.path.isfile(path): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.savetxt(path, np.array(sequence).reshape((seq_length, -1))) pbar.update(1) pbar.close()
def extract_features(): # Set defaults. seq_length = 30 class_limit = None # Number of classes to extract. Can be 1-101 or None for all. # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=class_limit) # get the model. model = Extractor() print(data.data) # Loop through data. pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = os.path.join(model_path, 'data','sequences', video[1] + '-' + str(seq_length) + '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
def extract_feature(video_path='data/video', frame_path='data/frame', sequence_path='data/sequence', seq_length=400, feature_length=2048): if not os.path.exists(sequence_path): os.mkdir(sequence_path) video_name = glob.glob(os.path.join(video_path, '*.mp4')) video_name_noext = [name.split(os.path.sep)[-1].split('.')[0] for name in video_name] pbar = tqdm(total=len(video_name_noext)) model = Extractor() # model_resnet50 = Extractor(model_name='resnet50') for video in video_name_noext: img_list = glob.glob(os.path.join(frame_path, video+'_*.jpg')) if len(img_list) == 0: continue seqfile = os.path.join(sequence_path, video + '_' + str(seq_length) + '.npy') # Check if we already have it. if os.path.isfile(seqfile): pbar.update(1) continue img_list_sorted = sorted(img_list) frames = rescale_list(img_list_sorted, seq_length) sequence = [] for image in frames: if image != -1: features = model.extract(image) # features_resnet50 = model_resnet50.extract(image) # features = np.concatenate((features, features_resnet50.flatten())) else: # zero paddind to the end of the list features = np.zeros((feature_length, ),dtype='float32') sequence.append(features) # Save the sequence. np.save(seqfile, sequence) pbar.update(1) pbar.close()
class Thermometer(object): def __init__(self): artists = open('../lib/performers.csv', 'r').readlines() self.artists = artists[0].split(',') self.googler = Googler() self.extractor = Extractor() self.session = Session() def extract_all_reviews(self): for a in self.artists: self.extract_concert_reviews_for_performer(a) def extract_concert_reviews_for_performer(self, performer): existing = db.get_review_by_teamband_name(self.session, performer) if existing.count() >= 2: print performer + " was already in the database" return print "now scraping " + performer urls = self.googler.google_concert_reviews_urls(performer + " concert reviews") for url in urls[:2]: if db.get_review_by_url(self.session, url).count() != 0: continue response = requests.get(url) tree = BeautifulSoup(response.text) most_likey_review = self.extractor.extract(tree) r = Review(teamband_name=performer, url=url, review=most_likey_review) self.session.add(r) self.session.commit() time.sleep(10)
def get_data(data_type): images = glob.glob(os.path.join('data', data_type, '**', '*.jpg')) images = sorted(images) classes = glob.glob(os.path.join('data', data_type, '*')) classes = sorted([item.split(os.path.sep)[-1] for item in classes]) # get the feature extract model model = Extractor() # Now loop through and extract features to build the sequence. sequence = [] labels = [] pbar = tqdm(total=len(images)) for image in images: features = model.extract(image) sequence.append(features) label = image.split(os.path.sep)[-2] label = classes.index(label) labels.append(label) pbar.update(1) return np.array(sequence), np.array(labels), len(sequence), len( sequence[0])
def crawl_school_programs(data): programs = [] for program in data: pprint(program) if program.has_key('text'): programs.append(program) continue url = program['url'] print 'requesting url %s ...' % url r = requests.get(url, verify=False) if r.status_code == 200: html = r.text extractor = Extractor() text = extractor.extract(html) if len(text.strip()) != 0: program['text'] = text else: print 'Error code' programs.append(program) return programs
def spider(self, root, pages = True, subcategories = True, action = "traverse", preclean = False, depth = 1): if preclean: self.graphdb.clear() seen_key = "URL_SEEN" queue_key = "URL_QUEUE" ex = Extractor() batch = neo4j.WriteBatch(self.graphdb) queue_empty = lambda: self.fdb.scard(queue_key) == 0 seen = lambda x: self.fdb.sismember(seen_key, x) visit = lambda x: self.fdb.sadd(seen_key, x) dequeue = lambda: self.fdb.spop(queue_key) enqueue = lambda x: self.fdb.sadd(queue_key, self._encode_str(x)) if action == "traverse": enqueue(root) while not queue_empty(): current = dequeue() print current if current and current.strip() and not seen(current): visit(current) result = ex.getAllFromCategory(current) self.updateBatch(batch, type = neo4j.Node, node = {'name': current, 'class': self.CATEGORY}) if pages: for page in result['pages']: print "{0}\tp:{1}".format(current[:15], page) self.incr_rel(page, current, self.CATEGORY_REL) self.updateBatch(batch, type = neo4j.Node, node = {'name': page, 'class': self.ARTICLE}) links = ex.getWikiLinks(page) for a in links: print "{0}\tp:{1}\t{2}".format(current[:15], page, a) self.incr_rel(a, page, self.SIBLING_REL) self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.ARTICLE}) if subcategories: for subcat in result['categories']: print "{0}\tc:{1}".format(current, subcat) self.incr_rel(subcat, current, self.SUBCAT_REL) self.updateBatch(batch, type = neo4j.Node, node = {'name': subcat, 'class': self.CATEGORY}) enqueue(subcat) elif action == "crawl": enqueue(root) while not queue_empty(): topic = dequeue() if topic and topic.strip() and not seen(topic): visit(topic) result = ex.extract(topic) depth -= 1 self.updateBatch(batch, type = neo4j.Node, node = {'name': topic, 'class': result['type']}) if result['type'] == self.CATEGORY: pass elif result['type'] == self.ARTICLE: for a in result['links']: self.incr_rel(a, topic, self.SIBLING_REL) print "adding: ", a self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.ARTICLE}) if depth > 0: enqueue(a) for c in result['categories']: self.incr_rel(a, topic, self.CATEGORY_REL) self.updateBatch(batch, type = neo4j.Node, node = {'name': c, 'class': self.CATEGORY}) elif result['type'] == self.DISAMBIGUATION: for a in result['links']: self.incr_rel(a, topic, self.DISAMB_REL) self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.DISAMBIGUATION}) print "FINISHED WITH THE NODES..." for k in self.fdb.smembers(self.rel_key): print "REL:", k try: nodes = k.split(":", 2) rel = nodes[0] n1 = self.node_index.get('name', nodes[1])[0] n2 = self.node_index.get('name', nodes[2])[0] self.updateBatch(batch, type = neo4j.Relationship, rel = {'node1': n1, 'rel': rel, 'weight': 1, 'node2': n2}) except Exception as e: print "REL EXCEPTION: ", e print "DONE>>>>>>>>>>>>>>>"
DXF_DIRECTORY = "./dxf/" CSV_DIRECTORY = "./csv/" ext = Extractor() # for each dxf file in ./dxf/ generate one csv file with the same name # in ./csv/ processed = 0 for filename in os.listdir(DXF_DIRECTORY): dxf_filepath = os.path.join(DXF_DIRECTORY, filename) # try to open the file, break if invalid file if ext.open_dxf(dxf_filepath): try: # extract pole coordinates/labels ext.extract() # write the csv csv_filepath = filename.split(".")[0] + ".csv" csv_filepath = os.path.join(CSV_DIRECTORY, csv_filepath) ext.write_csv(csv_filepath) processed += 1 except: print(f"Error processing file {dxf_filepath}!") else: print(f"Could not open {dxf_filepath}, skipping...") print( f"Complete. Successfully processed {processed} of {len(os.listdir(DXF_DIRECTORY))} files." )
class Crawler: """ Main class for this dummy crawler """ def __init__(self, dbfile): self.dbfile = dbfile self.data = None self.school_collection = None self.extractor = Extractor() def load(self): if self.data != None: print 'You have unsaved in-memory data, cannot load new data' exit(1) with open(self.dbfile, 'r') as f: self.data = json.load(f) self.school_collection = SchoolCollection(self.data['schools']) print 'Loaded %s json file, got %d schools' % (self.dbfile, self.school_collection.get_num_schools()) def dump(self): if self.data == None: print 'Nothing to dump' exit(1) self.data = self.school_collection.toJSON() with open(self.dbfile, 'w') as f: json.dump(self.data, f) print 'Dumped %s json file' % self.dbfile def fetch(self, url): """ Entrance for all kinds of HTTP requests """, is_ok,html = False,None try: response = requests.get(url, verify=False) if response.status_code == 200: html = response.text is_ok = True else: print >>sys.stderr, 'Error fetch' finally: return is_ok,html def fetch_program_text(self, url): """ Just read the content from url, load <p> text only. I think this is the best heuristic method. """ is_ok,html = self.fetch(url) html = html.strip() text = self.extractor.extract(html) return is_ok,text # important public API def add_program(self, school_name, data, fetch_text=True, override_program=False): """ Try to add a program to program list Currently I dont take care about the return value """ if self.school_collection.is_school_exists(school_name) == False: print >>sys.stderr, "Should add school '%s' first" % school_name return None school = self.school_collection.find_school(school_name) if school.is_program_exists(data['name']): if override_program == False: return None prog = Program(data) if fetch_text: is_ok,text = self.fetch_program_text(prog.url) if is_ok: prog.text = text pprint(prog.toJSON()) school.insert_program(prog) return None
def show_webcam(mirror=False): # initialize the video stream and pointer to output video file, then # allow the camera sensor to warm up print("[INFO] starting video stream...") writer = None # saved_model = 'data/checkpoints/lstm-features.037-0.131.h5' vs = cv2.VideoCapture(-1) time.sleep(2) # Set defaults. seq_length = 40 class_limit = 10 # Number of classes to extract. Can be 1-101 or None for all. data = DataSet(seq_length=seq_length, class_limit=class_limit) # get the model. modelE = Extractor() model = load_model(saved_model) # loop over frames from the video file stream while True: # grab the frame from the threaded video stream first ="" v1 ="" sequence = [] for i in range (0,40): ret_val,frame = vs.read() if ret_val == True: if mirror: frame = cv2.flip(frame, 1) width = np.size(frame, 1) height = np.size(frame, 0) x = width/2 y = height/2 cv2.imshow('my webcam', frame) cv2.putText(frame, first + v1, (x,y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255,0,0), thickness=1) frame = cv2.resize(frame,(299,299), interpolation = cv2.INTER_CUBIC) if cv2.waitKey(1) == 27: break # esc to quit else: break features = modelE.extract(frame) sequence.append(features) # Predict! print( np.shape(sequence)) prediction = model.predict(np.expand_dims(sequence, axis=0)) print(prediction) sorted_lps = data.print_class_from_prediction(np.squeeze(prediction, axis=0)) for i, class_prediction in enumerate(sorted_lps): if i > 10 - 1 or class_prediction[1] == 0.0: break print("%s: %.2f" % (class_prediction[0], class_prediction[1])) first = class_prediction[0] v1 = class_prediction[1]
directories = os.listdir(dirrr) dframes = [] for directory in directories: print(directory) indir = os.path.join(dirrr, directory) labels = label(directory) dframes.append(labels) videos = labels['Path'] for video in tqdm(videos): print(video) video = video[:-4] invideo = os.path.join(indir, video) video = video.replace('/', '_') outvideo = os.path.join(output, video) framename = os.listdir(invideo) framename.sort() seq = [] for frame in framename: inimg = os.path.join(invideo, frame) features = model.extract(inimg) seq.append(features) seq = np.array(seq) np.save(outvideo + '.npy', seq) pd.concat(dframes).to_csv(maindir + '/Labels' + '/Final.csv', index=False)
for read in read_data_list: # バイナリ用と多クラス用のラベリングを作成 binary_label = int(float(read[1])) #if binary_label == 1: # class_num += 1 img_path = str(read[0]) # ディレクトリを修正 img_path = img_path.replace('./data', '/media/futami/HDD1/DATASET_KINGDOM/Scene') print(img_path) # 特徴ベクトルをnumpy形式で保存 # 使用する特徴抽出器を選択 feature = model.extract(img_path, model_name) feature_shape = str(feature.shape) feature = feature.tolist() feature.insert(0, img_path) feature.insert(1, binary_label) #feature.insert(2, class_num) new_data_list.append(feature) print ('extra feature: ' + img_path) # save labeling as csv file with open(NEW_LABELING_DIR + basename , 'w') as f: writer = csv.writer(f)
class ICLoop(QObject): stateChanged = pyqtSignal(str) icmodeChanged = pyqtSignal(str) # stage events preparing2inject = pyqtSignal() injecting = pyqtSignal() injected = pyqtSignal() preparing2extract = pyqtSignal() extracting = pyqtSignal() extracted = pyqtSignal() def __init__(self): QObject.__init__(self) self.linStarter = LinStarter() self.extractor = Extractor() self.modeCtl = modes.ModesClient() self.particles = 'e' # 'e', 'p' self.stored_particles = None # None, 'e', 'p' self.requested_particles = None # None (means do not switch), 'e', 'p' self.beam_user = None # None, 'v2', 'v4' self.requested_beam_user = None self.requested_runmode = None self.ic_runmode = 'manual' # 'manual', 'single', 'round', 'auto' self.state = "idle" self.state_ind = 0 self.shots = {'e': 5, 'p': 50} self.kickers_subsys = [22, 18, 19] # subsystems to switch injection-extraction self.ic_subsys = [ 32, 17, 38, 52, 53, 54, 55, 29, 59, 61, 62, 63, 64, 65, 67, 68, 60, 69, 70, 71, 72, 73, 66, 74, 75, 50, 3, 4, 51, 5, 6, 23, 30, 7, 8, 9, 10, 11, 33, 37 ] self.k500_subsys = [ 34, 24, 45, 12, 56, 13, 46, 14, 57, 15, 47, 43, 76, 58, 44, 48, 26, 25, 49, 28, 27 ] self.timer = QTimer() self.modeCtl.markedReady.connect(self.nextState) self.linStarter.runDone.connect(self.nextState) self.extractor.extractionDone.connect(self.nextState) self.states = [ self.__idle, self.__preinject, self.__injecting, self.__injected, self.__preextract, self.__extracting, self.__extracted ] # stat machine switching conditions implementation def nextState(self): if self.ic_runmode != "manual": # for manual operation - just proc requested stage if possible and stop self.stateChanged.emit(self.state) return self.state_ind += 1 self.state = _states[self.state_ind] self.stateChanged.emit(self.state) self.states[self.state_ind]() if self.state == "injected" and self.ic_runmode in ["round", "auto"]: self.nextState() if self.state == 'extracted' and self.ic_runmode in ["round", "auto"]: self.state_ind = 0 self.state = _states[self.state_ind] self.nextState() # state functions: what to do when proceeding to state def __idle(self): pass def __preinject(self): # check for requests if self.requested_particles: pass if self.requested_beam_user: pass self.linStarter.setRunmode(1) self.modeCtl.load_marked(mode_map[self.particles + 'inj'], self.kickers_subsys) def __injecting(self): self.linStarter.newCounterCycle(self.shots[self.particles]) # after injection initiation - possible some particles already stored self.stored_particles = self.particles def __injected(self): pass def __preextract(self): self.modeCtl.load_marked(mode_map[self.particles + 'ext'], self.kickers_subsys) def __extracting(self): self.extractor.extract() def __extracted(self): # the particles are gone self.stored_particles = None # commands inplementalions ------------------------- # not really correct... we need to initiate end of round, # extract beam if needed and then make changes to magnetic systems def setUseCase(self, particles, beam_user): if self.beam_user == beam_user and self.particles == particles: # no changes return mode_subsys = [] if self.particles == particles and self.beam_user != beam_user: # just beam user changed, possibly no changes to IC # need to initiate channels remag # stop beam if running, don't drop beam self.requested_runmode = self.runmode if self.state != 'idle': self.stop() mode_subsys = self.k500_subsys if self.particles != particles and self.beam_user == beam_user: # need to drop beam and change everything in magsys # ask to drop a beam if self.state != 'idle': self.extract() mode_subsys = self.ic_subsys + self.k500_subsys if self.particles != particles and self.beam_user != beam_user: # need to drop beam and change everything in magsys # ask to drop a beam if self.state != 'idle': self.extract() mode_subsys = self.ic_subsys + self.k500_subsys start_mode = mode_num(self.particles, self.beam_user) target_mode = mode_num(particles, beam_user) mag_path = { name: mode_path_num(name, start_mode, target_mode) for name in remag_devs } #self.modeCtl.load_marked(mode, mode_subsys) def setLinRunMode(self, runmode): if isinstance(runmode, str): mode_val = runmodes[runmode] else: mode_val = runmode if self.linStarter.runmode == mode_val: # no changes return if self.state != 'idle': # if in any automatic stages - go to idle state self.stop() self.linStarter.setRunmode(runmode) def setEshots(self, num): self.shots['e'] = int(num) def setPshots(self, num): self.shots['p'] = int(num) # stop any operation def stop(self): self.ic_runmode = 'manual' self.state = 'idle' self.state_ind = 0 self.icmodeChanged.emit(self.ic_runmode) self.linStarter.stopCounter() self.extractor.stopExtraction() def inject(self): self.ic_runmode = 'single' self.state = 'idle' self.state_ind = 0 self.icmodeChanged.emit(self.ic_runmode) self.nextState() def extract(self): self.ic_runmode = 'manual' self.state = 'injected' self.state_ind = 3 self.icmodeChanged.emit(self.ic_runmode) self.nextState() def execRound(self): self.ic_runmode = 'round' self.state = 'idle' self.state_ind = 0 self.icmodeChanged.emit(self.ic_runmode) self.nextState() def execBurst(self): self.ic_runmode = 'auto' self.state = 'idle' self.state_ind = 0 self.icmodeChanged.emit(self.ic_runmode) self.nextState()
def extract(): extractor = Extractor() extractor.extract()
def main(): parser = argparse.ArgumentParser("PyTorch Face Recognizer") parser.add_argument('--cmd', default='extract', type=str, choices=['train', 'test', 'extract'], help='train, test or extract') parser.add_argument('--arch_type', type=str, default='senet50_ft', help='model type', choices=[ 'resnet50_ft', 'senet50_ft', 'resnet50_scratch', 'senet50_scratch' ]) parser.add_argument('--dataset_dir', type=str, default='/tmp/Datasets/3Dto2D/squared/uniques', help='dataset directory') parser.add_argument('--log_file', type=str, default='/path/to/log_file', help='log file') parser.add_argument( '--train_img_list_file', type=str, default='/path/to/train_image_list.txt', help='text file containing image files used for training') parser.add_argument( '--test_img_list_file', type=str, default='/path/to/test_image_list.txt', help= 'text file containing image files used for validation, test or feature extraction' ) parser.add_argument( '--meta_file', type=str, default='/tmp/face-hallucination/style/vgg-face/identity_meta.csv', help='meta file') parser.add_argument('--checkpoint_dir', type=str, default='/path/to/checkpoint_directory', help='checkpoints directory') parser.add_argument('--feature_dir', type=str, default='/path/to/feature_directory', help='directory where extracted features are saved') parser.add_argument( '-c', '--config', type=int, default=1, choices=configurations.keys(), help='the number of settings and hyperparameters used in training') parser.add_argument('--batch_size', type=int, default=32, help='batch size') parser.add_argument('--resume', type=str, default='', help='checkpoint file') parser.add_argument( '--weight_file', type=str, default= '/tmp/face-hallucination/style/vgg-face/models/senet50_ft_weight.pkl', help='weight file') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument( '--horizontal_flip', action='store_true', help='horizontally flip images specified in test_img_list_file') args = parser.parse_args() print(args) if args.cmd == "extract": utils.create_dir(args.feature_dir) if args.cmd == 'train': utils.create_dir(args.checkpoint_dir) cfg = configurations[args.config] log_file = args.log_file resume = args.resume os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) cuda = torch.cuda.is_available() if cuda: print("torch.backends.cudnn.version: {}".format( torch.backends.cudnn.version())) torch.manual_seed(1337) if cuda: torch.cuda.manual_seed(1337) # 0. id label map meta_file = args.meta_file id_label_dict = utils.get_id_label_map(meta_file) # 1. data loader root = args.dataset_dir train_img_list_file = args.train_img_list_file test_img_list_file = args.test_img_list_file kwargs = {'num_workers': args.workers, 'pin_memory': True} if cuda else {} if args.cmd == 'train': dt = datasets.VGG_Faces2(root, train_img_list_file, id_label_dict, split='train') train_loader = torch.utils.data.DataLoader(dt, batch_size=args.batch_size, shuffle=True, **kwargs) dv = datasets.VGG_Faces2(root, test_img_list_file, id_label_dict, split='valid', horizontal_flip=args.horizontal_flip) val_loader = torch.utils.data.DataLoader(dv, batch_size=args.batch_size, shuffle=False, **kwargs) # 2. model include_top = True if args.cmd != 'extract' else False if 'resnet' in args.arch_type: model = ResNet.resnet50(num_classes=N_IDENTITY, include_top=include_top) else: model = SENet.senet50(num_classes=N_IDENTITY, include_top=include_top) # print(model) start_epoch = 0 start_iteration = 0 if resume: checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] start_iteration = checkpoint['iteration'] assert checkpoint['arch'] == args.arch_type print("Resume from epoch: {}, iteration: {}".format( start_epoch, start_iteration)) else: utils.load_state_dict(model, args.weight_file) if args.cmd == 'train': model.fc.reset_parameters() if cuda: model = model.cuda() criterion = nn.CrossEntropyLoss() if cuda: criterion = criterion.cuda() # 3. optimizer if args.cmd == 'train': optim = torch.optim.SGD([ { 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': cfg['lr'] * 2, 'weight_decay': 0 }, ], lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay']) if resume: optim.load_state_dict(checkpoint['optim_state_dict']) # lr_policy: step last_epoch = start_iteration if resume else -1 lr_scheduler = torch.optim.lr_scheduler.StepLR(optim, cfg['step_size'], gamma=cfg['gamma'], last_epoch=last_epoch) if args.cmd == 'train': trainer = Trainer( cmd=args.cmd, cuda=cuda, model=model, criterion=criterion, optimizer=optim, lr_scheduler=lr_scheduler, train_loader=train_loader, val_loader=val_loader, log_file=log_file, max_iter=cfg['max_iteration'], checkpoint_dir=args.checkpoint_dir, print_freq=1, ) trainer.epoch = start_epoch trainer.iteration = start_iteration trainer.train() elif args.cmd == 'test': validator = Validator( cmd=args.cmd, cuda=cuda, model=model, criterion=criterion, val_loader=val_loader, log_file=log_file, print_freq=1, ) validator.validate() elif args.cmd == 'extract': extractor = Extractor( cuda=cuda, model=model, val_loader=val_loader, log_file=log_file, feature_dir=args.feature_dir, flatten_feature=True, print_freq=1, ) extractor.extract()
pbar = tqdm(total=len(data.data)) for video in data.data: # Get the path to the sequence for this video. path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \ '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_for_sample(video) # Now downsample to just the ones we need. frames = data.rescale_list(frames, seq_length) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
#for video in data.data: for index, i in enumerate(sequences): # Get the path to the sequence for this video. #path = os.path.join('train', video[0]) # numpy will auto-append .npy path = i # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = sorted(glob.glob(os.path.join(path, '*jpg'))) # Now downsample to just the ones we need. frames = data.rescale_list(frames, 40) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
class MainFrame(Frame): def __init__(self, parent): Frame.__init__(self, parent) self.parent = parent self.music_root = '' self.query_path = '' self.extractor = Extractor(n_frames=40, n_blocks=100, learning_rate=0.00053, verbose=True) self.style = Style() self.style.theme_use("default") padx = 2 pady = 2 root_select_button = Button(self, text="Select a directory") root_select_button.pack(fill=tkinter.X, padx=padx, pady=pady) root_select_button.bind("<Button-1>", self.set_music_root) analyze_button = Button(self, text="Analyze") analyze_button.pack(fill=tkinter.X, padx=padx, pady=pady) analyze_button.bind("<Button-1>", self.analyze) query_select_button = Button(self, text="Select a file") query_select_button.pack(fill=tkinter.X, padx=padx, pady=pady) query_select_button.bind("<Button-1>", self.set_query_path) search_button = Button(self, text="Search similar songs") search_button.pack(fill=tkinter.X, padx=padx, pady=pady) search_button.bind("<Button-1>", self.search_music) self.pack(fill=BOTH, expand=1) def set_music_root(self, event): self.music_root = filedialog.askdirectory() def analyze(self, event): if(self.music_root == ''): #TODO show error dialog print("Set a music directory first") return print("Analyzing") path_feature_map, error = self.extractor.extract(self.music_root) print("Saving") filename = os.path.basename(self.music_root) jsonpath = os.path.join(jsondir, '{}.json'.format(filename)) dump_json(path_feature_map, jsonpath) def set_query_path(self, event): self.query_path = filedialog.askopenfilename(initialdir=self.music_root) def search_music(self, event): if(self.query_path == ''): #TODO show error dialog print("Set a music file first") return k_nearest = search(self.query_path) music_list = MusicList(self) for path, vector in k_nearest: music_list.append(path)