Example #1
0
    def install(self):
        # Check if package installed
        db = hpakDB(self.pkg_name)
        if db.get_value("status") == "installed":
            misc.print_error("%s - already installed!" % (self.pkg_name),
                             False)
            return

        self.prepare_install()
        dl = download(self.options['source'], self.pkg_path, self.pkg_name)
        dl.get()

        # Extracting the file.
        e = Extractor(self.options)
        e.extract()

        # Install depends
        self.install_dep()

        Cmds = self.options['install'].split(',')
        for cmd in Cmds:
            subprocess.Popen(cmd, shell=True).wait()

        # Verify package installed.
        if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])):
            db = hpakDB(self.pkg_name)
            db.set_value("status", "installed")
            misc.print_success("%s installed." % (self.pkg_name))
        else:
            misc.print_error(
                "%s-%s NOT installed, please try again." %
                (self.pkg_name, self.options['version']), True)
Example #2
0
	def install(self):
		# Check if package installed
		db = hpakDB(self.pkg_name)
		if db.get_value("status") == "installed":
			misc.print_error("%s - already installed!" % (self.pkg_name), False)
			return
							
		self.prepare_install()
		dl = download(self.options['source'], self.pkg_path, self.pkg_name)
		dl.get()
		
		# Extracting the file.
		e =	Extractor(self.options)
		e.extract()

		# Install depends
		self.install_dep()

		Cmds = self.options['install'].split(',')
		for cmd in Cmds:
			subprocess.Popen(cmd, shell=True).wait()

		# Verify package installed.
		if os.path.exists("%s/%s" % (HPAK_ROOT, self.options['dir'])):
			db = hpakDB(self.pkg_name)
			db.set_value("status", "installed")
			misc.print_success("%s installed." % (self.pkg_name))
		else:
			misc.print_error("%s-%s NOT installed, please try again." % (self.pkg_name, self.options['version']), True) 
Example #3
0
    def __init__(self, filename):
        super(Database, self).__init__()
        Extractor.extract(filename)

        lines = ''
        with open(filename, 'r') as f:
            lines = f.readlines()

        self.courses = list()

        for i in lines:
            x = i.split(',')
            x = [y.strip('()"') for y in x]
            self.courses.append(Course(x[0],x[1],x[2],x[3],x[4],x[5],x[6]))
Example #4
0
def start(input_rom: str = None, output_folder: str = "./out") -> None:
    '''
    Extracts data from the input rom.

    input_rom: str
        The path of the rom to extract data from.
    output_folder: str
        The path where the data extracted from the ROM will be saved. 
    '''

    nds: NDS = NDS()
    extractor = Extractor(input_rom=input_rom,
                          output_folder=output_folder,
                          nds=nds)
    extractor.extract()
Example #5
0
def extract_features(seq_length=40, class_limit=2, image_shape=(299, 299, 3)):
    # Get the dataset.
    data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape)

    # get the model.
    model = Extractor(image_shape=image_shape)
    # Loop through data.
    pbar = tqdm(total=len(data.data))
    for video in data.data:

        # Get the path to the sequence for this video.
        path = os.path.join('/content','Geriatrics_Data','Video','sequences', video[2] + '-' + str(seq_length) + \
            '-features')  # numpy will auto-append .npy

        # Check if we already have it.
        if os.path.isfile(path + '.npy'):
            pbar.update(1)
            continue

        # Get the frames for this video.
        frames = data.get_frames_for_sample(video)
        # Now downsample to just the ones we need.
        frames = data.rescale_list(frames, seq_length)

        # Now loop through and extract features to build the sequence.
        sequence = []
        for image in frames:
            features = model.extract(image)
            sequence.append(features)
        # Save the sequence.
        np.save(path, sequence)

        pbar.update(1)

    pbar.close()
Example #6
0
 def test_cond(self):
     from masks import mask
     e = Extractor()
     logging.debug(e)
     e.add_feature_condition(mask)
     res = e.extract(self.data)
     self.assertTrue(len(res[self.data.keys()[0]]) > 0)
Example #7
0
def extract_one_feature(video, frame_path, sequence_path, seq_length=400, feature_length=2048):
    model = Extractor()
    # model_resnet50 = Extractor(model_name='resnet50')
    img_list = glob.glob(os.path.join(frame_path, video + '_*.jpg'))
    if len(img_list) == 0:
        return

    seqfile = os.path.join(sequence_path, video + '_' + str(seq_length) + '.npy')
    # Check if we already have it.
    if os.path.isfile(seqfile):
        return

    img_list_sorted = sorted(img_list)
    frames = rescale_list(img_list_sorted, seq_length)

    sequence = []
    for image in frames:
        if image != -1:
            features = model.extract(image)
            # features_resnet50 = model_resnet50.extract(image)
            # features = np.concatenate((features, features_resnet50.flatten()))
        else:
            features = np.zeros((feature_length, 1))
        sequence.append(features)

    # Save the sequence.
    np.save(seqfile, sequence)
Example #8
0
def extract(data, seq_length, video_name):
    # get the model.
    model = Extractor()
    # init the sequence
    sequence = []

    # First, find the sample row.
    sample = None
    for row in data.data:
        if row[2] == video_name:
            sample = row
            break
    if sample is None:
        raise ValueError("Couldn't find sample: %s" % video_name)

    # Get the frames for this video.
    frames = data.get_frames_for_sample(sample)
    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    for image in frames:
        features = model.extract(image)
        sequence.append(features)

    sequence = np.asarray(sequence)
    return sequence
Example #9
0
def extract_features(params, input_):

    if (os.path.exists(input_) == False):
        raise Exception("Input not found")

    #Params
    model_def = params['model_file'][0]
    pretrained_model = params['pretrained_model'][0]
    image_dims = None if params['img_dims'][0] == 'None' else params[
        'img_dims'][0]
    mean = None if params['mean'][0] == 'None' else params['mean'][0]
    input_scale = None if params['input_scale'][0] == 'None' else params[
        'input_scale'][0]
    raw_scale = None if params['raw_scale'][0] == 'None' else params[
        'raw_scale'][0]
    channel_swap = None if params['channel_swap'][0] == 'None' else params[
        'channel_swap'][0]
    layer = None if params['layer'][0] == 'None' else params['layer'][0]

    #Instatiate Extractor class
    extractor = Extractor(model_def,
                          pretrained_model,
                          image_dims=image_dims,
                          mean=mean,
                          input_scale=input_scale,
                          raw_scale=raw_scale,
                          channel_swap=channel_swap,
                          layer=layer)
    output = extractor.extract(input_)
    return output, layer
Example #10
0
def extract_features():
    Model = Extractor()
    current_dir = os.getcwd()
    vid_name = VIDEO_PATH.split('/')
    vid_name= vid_name[len(vid_name) - 1]
    seq_path = re.sub(r'\.\w{3}', '-features.txt', vid_name)


    if not os.path.isfile(seq_path):
        get_frames(VIDEO_PATH)
        vid_frame_fmt = re.sub(r'\.\w{3}', '*.jpg', VIDEO_PATH)
        frames = glob.glob(vid_frame_fmt)
        if len(frames) > SEQ_LENGTH:
            # downsample number of frames to SEQ_LENGTH
            skip = len(frames)
            new_frames = [frames[i] for i in range(0, len(frames), skip)]
        sequence = []
        for frame in frames:
            features = Model.extract(frame)
            sequence.append(features)

        np.savetxt(seq_path, sequence)
        print "Sequence file saved to %s" % seq_path
    else:
        print "Feature vector text file already exists for %s" % VIDEO_PATH
Example #11
0
def extract(inDir, seqName, dataDir, seqLength):

    # Get the dataset.
    data = DataSet(seqName, seqLength, inDir, dataDir)
    # get the model.
    model = Extractor(seqName)

    # Loop through data.
    max_depth = 0
    bottom_most_dirs = []

    # data = listOfDirectories;
    for thisDir in data.dataLowest:

        # Get the path to the sequence for this video.
        npypath = os.path.join(thisDir, seqName)

        frames = sorted(glob.glob(os.path.join(thisDir, '*png')))
        sequence = []
        for image in frames:
            features = model.extract(image)
            sequence.append(features)
        # Save the sequence.
        np.save(npypath, sequence)
    """Main Thread"""
Example #12
0
def frames_to_features(frames):
    """ Extract InceptionV3 features from all images in path

    return list with extracted features
    """
    # ensure max number of frames
    if (len(frames) > frames_nb):
        print("Only using first %d of %d frames" % (frames_nb, len(frames)))
        frames = frames[:frames_nb]

    # get the model
    print("Load Inception v3 network ...")
    cnn = Extractor()

    # loop thru
    sequence = []
    timer_start()
    for image in frames:
        print("Extracting features from", image)
        features = cnn.extract(image)
        sequence.append(features)
    timer_stop()

    print("Extracted features from %d frames" % len(frames))
    return sequence
Example #13
0
class IndexTrainer(object):

	def __init__(self):
		self.index = InvertedIndex()
		self.bow = Bow()
		self.extractor = Extractor('surf')
		print self.index.author
		print self.index.description

	def load_feature(self, path='../models/feature.npy'):
		self.features = np.load(path)
		if len(self.features) > 200000:
			self.features = self.features[:200000]
		print "feature shape: ", self.features.shape
		return self.features

	def run(self, path):
		self.bow.load()
		self.index.reset(self.bow.centers)
		images = imutil.get_list_image(path)
		t = imutil.Timer(1)
		t.tic()
		for i,image in enumerate(images):
			descriptors = self.extractor.extract(image)
			self.index.append(image, descriptors)
			if (i+1)%1000 == 0:
				t.toc('finish 1000 images: ')
				t.tic()
def get_data(sample_num):
    #random pick samples from classes
    random_images = []
    #images at <Projects>/data/places365
    class_folders = glob.glob(os.path.join('data', 'place365', '*'))
    for class_item in class_folders:
        images = glob.glob(os.path.join(class_item, '*.jpg'))
        for _ in range(sample_num):
            # Get a random row.
            sample = random.randint(0, len(images) - 1)
            image = images[sample]
            random_images.append(image)
    random_images = sorted(random_images)

    classes = glob.glob(os.path.join('data', 'place365', '*'))
    classes = sorted([item.split(os.path.sep)[-1] for item in classes])

    # get the feature extract model
    model = Extractor()

    # Now loop through and extract features to build the sequence.
    sequence = []
    labels = []
    pbar = tqdm(total=len(random_images))
    for image in random_images:
        features = model.extract(image)
        sequence.append(features)
        label = image.split(os.path.sep)[-2]
        label = classes.index(label)
        labels.append(label)
        pbar.update(1)

    return np.array(sequence), np.array(labels), len(sequence), len(
        sequence[0])
Example #15
0
def run(args):
    src = os.path.abspath(args["src"])
    if not os.path.exists(src):
        raise Exception("Source directory (%s) does not exist." % (src))
    
    dst = os.path.abspath(args["dst"])
    os.makedirs(dst, exist_ok=True)

    model = Extractor()
    model.set_defs("./definitions/defs.json")
    files = collect_files(src)
    for f in files:
        name = f[0:-4]
        src_file = os.path.join(src, f)
        xml_file = os.path.join(dst, name + ".xml")
        xml_args = ["-o", xml_file, src_file]
        try:
            status = pdfToXML.convert_to_xml(xml_args)
            if not status: continue
            r = Reader({"src": xml_file})
            blocks, kv, texts = r.get_content()
            content = {"texts": texts, "blocks": blocks, "kv": kv}
            model.set_content(content)
            df = model.extract()
            output_file = os.path.join(dst, name + ".%s" % args["output"])
            if args["output"] == "csv":
                df.to_csv(output_file)
            elif args["output"] == "xlsx":
                writer = pandas.ExcelWriter(src_file)
                df.to_excel(writer)
                writer.save()
        except Exception:
            pass
Example #16
0
def extract_and_conv(data, seq_length, video_name):
    # get the model.
    model = Extractor()
    # init the sequence
    sequence = []
    # init the conv output
    conv_sequence = []

    # First, find the sample row.
    sample = None
    for row in data.data:
        if row[2] == video_name:
            sample = row
            break
    if sample is None:
        raise ValueError("Couldn't find sample: %s" % video_name)

    # Get the frames for this video.
    frames = data.get_frames_for_sample(sample)
    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features & conv output sequence.
    for image in frames:
        features = model.extract(image)
        sequence.append(features)
        # Get last conv layer output.
        conv_out = model.get_convout(image)
        conv_sequence.append(conv_out)

    return frames, sequence, conv_sequence
Example #17
0
    def Classify(self, request_iterator, context):
        saved_model = 'data/checkpoints/lstm-features.037-0.131.h5'

        point_count = 0
        seq_length = 40
        class_limit = 10  # Number of classes to extract. Can be 1-101 or None for all.
        data = DataSet(seq_length=seq_length, class_limit=class_limit)
        modelE = Extractor()

        model = load_model(saved_model)
        sequence = []
        for Chunk in request_iterator:
            byt = Chunk.Content
            byt = pickle.loads(byt)
            features = modelE.extract(byt)

            sequence.append(features)
            point_count += 1
            if point_count == 40:
                print(np.shape(sequence))
                prediction = model.predict(np.expand_dims(sequence, axis=0))
                print(prediction)
                message = []
                classs = []
                sorted_lps = data.print_class_from_prediction(
                    np.squeeze(prediction, axis=0))
                if sorted_lps is not None:
                    for i, class_prediction in enumerate(sorted_lps):
                        if i > 10 - 1 or class_prediction[1] == 0.0:
                            break
                        print("%s: %.2f" %
                              (class_prediction[0], class_prediction[1]))
                        message.append(class_prediction[1])
                        classs.append(class_prediction[0])
                        first = class_prediction[0]
                        v1 = class_prediction[1]

                yield humanaction_pb2.label(message1=message[0],
                                            message2=message[1],
                                            message3=message[2],
                                            message4=message[3],
                                            message5=message[4],
                                            message6=message[5],
                                            message7=message[6],
                                            message8=message[7],
                                            message9=message[8],
                                            message10=message[9],
                                            class1=classs[0],
                                            class2=classs[1],
                                            class3=classs[2],
                                            class4=classs[3],
                                            class5=classs[4],
                                            class6=classs[5],
                                            class7=classs[6],
                                            class8=classs[7],
                                            class9=classs[8],
                                            class10=classs[9])
                sequence = []
                point_count = 0
Example #18
0
 def test_monotony(self):
     from masks import absolute_monotony as monotony
     e = Extractor()
     logging.debug(e)
     e.add_feature_condition(monotony.Raising)
     e.add_feature_condition(monotony.Falling)
     res = e.extract(self.data)
     logging.debug("res: \n%s", pprint.pformat(res))
     self.assertTrue(len(res[self.data.keys()[0]]) > 0)
Example #19
0
 def extract(self):
     # Extracts forum posts from .json-files generated by Scraper
     lines = []
     for category in self.categories:
         lines.append(
             Extractor.extract(('flashback' + str(category) + '.json'),
                               ('extracted' + str(category) + '.txt')))
         dataset_divider.Divider.divide(
             ('extracted' + str(category) + '.txt'), lines[len(lines) - 1])
Example #20
0
def main():
    '''
    Entry point when executing from commandline.
    '''

    parser=argparse.ArgumentParser()
    parser.add_argument('--max', help='The maximum number of data points to be used while training model. \
        if empty, the whole training set will be used.')
    parser.add_argument('--C', type=float, default=1.0, help='C parameter for SVC algorithm. \
        if empty, C will be set to 1.0.')
    parser.add_argument('--gamma', type=float, default=.01, help='Gamma parameter for SVC algorithm. \
        if empty, gamma will be set to 0.01.')
    parser.add_argument('--kernel', type=str, default='linear', help='Kernel for SVC algorithm. \
        if empty, linear kernel will be used. Only linear and rbf kernel are supported at the moment.')
    args=parser.parse_args()

    if (args.max and not is_number(args.max)) or \
            (args.kernel != 'kernel' and args.kernel != 'rbf'):
        print(constants.TRAINER_HELP_MSG)
        return

    try:
        extractor = Extractor()

        # Extract train set from archive file
        train_set, _, _ = extractor.extract(constants.MNIST_DATASET_PATH)

        if args.max:
            feature_set = train_set[0][:int(args.max)]
            label_set = train_set[1][:int(args.max)]
        else:
            feature_set = train_set[0]
            label_set = train_set[1]

        label_encoder = LabelEncoder()
        labels = label_encoder.fit_transform(label_set)

        # Use SVC to train a recognition model
        recognizer = SVC(C=args.C, gamma=args.gamma, kernel=args.kernel, probability=True)
        recognizer.fit(feature_set, labels)

        # Write trained model and label encoder to file
        with open(constants.MODEL_FILE_PATH, 'wb') as f:
            f.write(pickle.dumps(recognizer))
        with open(constants.LABEL_ENCODER_FILE_PATH, 'wb') as f:
            f.write(pickle.dumps(label_encoder))

        print('Training done')

    except MemoryError:
        # The training dataset is quite big, more than 4GB of RAM and python 64 bit is required
        print('An memory error has occurred, please check if you have enough memory \
                and you are using python 64bit')

    except Exception as e:
        print('An error has occurred')
        print(str(e))
Example #21
0
class Extraktor(object):
    def __init__(self):
        self.extractor = Extractor()
        self.sqs = boto3.client('sqs')
        self.queue_url = 'https://sqs.ap-southeast-1.amazonaws.com/841662669278/crawler'
        self.s3 = boto3.client('s3')
        self.dynamodb = boto3.resource('dynamodb')
        self.bloom_filter = MyBloomFilter(self.dynamodb.Table('link'))

    def process(self):
        while True:
            ret = self.sqs.receive_message(
                QueueUrl=self.queue_url,
                MaxNumberOfMessages=10,
                WaitTimeSeconds=1
            )

            if 'Messages' not in ret:
                continue
            
            for msg in ret['Messages']:
                key = msg['Body']
                record = self.s3.get_object(Bucket='samuel-html', Key=key)
                #pack['Body'] botocore.response.StreamingBody
                pack = json.loads(lzo.decompress(record['Body'].read()).decode('utf-8'))
            #    response = self.client.delete_message(
            #        QueueUrl=self.queue_url,
            #        ReceiptHandle=msg['ReceiptHandle']
            #    )
            #    print(response)

                self.bloom_filter.add(pack['url'])
                if pack.get('code') == 200:
                    url = pack['url']
                    ret = self.extractor.extract(pack)
                    for link in ret['links']:
                        if not self.bloom_filter.add(link['url']):
                            seed(link)
                        else:
                            #print 'already crawled', link['url']
                            pass
                    #save pack to tbl_link
                    self.dynamodb.Table('link').put_item(
                        Item = {
                            'url': url,
                            'ctime': Decimal(str(time.time())),
                            'utime': Decimal(str(time.time()))
                        }
                    )
                    logger.info("%s ok" % (pack['url']))
                else:
                    logger.warn("%s not ok code:%d" % (pack['url'], pack.get('code')))
                response = self.sqs.delete_message(
                    QueueUrl=self.queue_url,
                    ReceiptHandle=msg['ReceiptHandle']
                )
def extract_full_features(weights, seq_length = 40):
    # Set defaults.

    class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

    # Get the dataset.
    data = DataSet(seq_length=seq_length, class_limit=class_limit, check_dir='data/check')

    # get the model.
    # model = Extractor()
    # model = Extractor(weights="data/checkpoints/inception.009-0.29.hdf5")
    model = Extractor(weights)

    # Loop through data.
    print(data.data)
    pbar = tqdm(total=len(data.data))
    for video in data.data:

        # Get the path to the sequence for this video.
        path = os.path.join('data', 'sequences_test', video[2] + '-' + str(seq_length) + \
            '-features')  # numpy will auto-append .npy

        # Check if we already have it.
        if os.path.isfile(path + '.npy'):
            pbar.update(1)
            continue

        # Get the frames for this video.
        frames = data.get_frames_for_sample(video)

        # Now downsample to just the ones we need.
        # frames = data.rescale_list(frames, seq_length)

        # Now loop through and extract features to build the sequence.
        sequence = []
        for image in frames:
            features = model.extract(image)
            sequence.append(features)
        # print(path)
        output_dir = os.path.join('data', 'sequences_test')
        if not (os.path.exists(output_dir)):
            # create the directory you want to save to
            os.mkdir(output_dir)
        # Save the sequence.
        np.save(path, sequence)

        pbar.update(1)

    pbar.close()
class Schema():
    # Schema get the input from the Collator and the Extractor to feed the Parser
    # and generate a list of ready json files to be saved

    def __init__(self, database):
        self._collator = Collator(database)
        self._extractor = Extractor()
        self._parser = Parser()

    def generate(self, path):
        grouping_nodes = self._collator.grouping_nodes()
        print('\n ---- Done Grouping Nodes ----')
        Schema.print_grouping(grouping_nodes)

        grouping_relationships = self._collator.grouping_relationships()
        print('\n ---- Done Grouping Relationships ----')
        Schema.print_grouping(grouping_relationships)

        extracted_grouping_nodes = self._extractor.extract(grouping_nodes)

        print('\n ---- Done Extracting ----')
        Schema.print_grouping({**extracted_grouping_nodes, ** grouping_relationships})

        parsed_list = self._parser.parse(extracted_grouping_nodes, grouping_relationships)

        self._save(path, parsed_list)


    def _save(self, path, parsed_list):
        if not os.path.exists(path):
            os.makedirs(path)

        data_folder = Path(path)
        for item in parsed_list:
            with open(data_folder / item['$id'], 'w') as parsed_file:
                json.dump(item, parsed_file, indent=4)

    
    @staticmethod
    def print_grouping(grouping):
        for k in grouping:
            print('\nKey: ' + str(k))
            print('Properties: ' +str(grouping[k]['props']))
            if 'relationships' in grouping[k].keys():
                print('Relationships: ' + str(grouping[k]['relationships']))
            
            if 'allOf' in grouping[k].keys():
                print('allOf: ' + str(grouping[k]['allOf']))
Example #24
0
def extractor_features(data_file,
                       sequences_dir,
                       seq_length,
                       pretrained_model=None,
                       layer_name=None,
                       size=(150, 150)):

    if not os.path.exists(sequences_dir):
        os.makedirs(sequences_dir)

    # Get the dataset.
    data = DataSet(data_file,
                   sequences_dir,
                   seq_length=seq_length,
                   class_limit=class_limit)
    # get the model.
    model = Extractor(pretrained_model, layer_name, size)
    # Loop through data.
    pbar = tqdm(total=len(data.data))
    for video in data.data:

        # Get the path to the sequence for this video.
        path = sequences_dir + '/' + video[FILE_INDEX] + '-' + str(
            seq_length) + '-features.txt'

        # Check if we already have it.
        if os.path.isfile(path):
            pbar.update(1)
            continue

        # Get the frames for this video.
        frames = data.get_frames_for_sample(video)

        # Now downsample to just the ones we need.
        frames = data.rescale_list(frames, seq_length)

        # Now loop through and extract features to build the sequence.
        sequence = []
        for image in frames:
            features = model.extract(image)
            sequence.append(features)

        # Save the sequence.
        np.savetxt(path, np.array(sequence).reshape((seq_length, -1)))

        pbar.update(1)
    pbar.close()
Example #25
0
def extract_features():
    # Set defaults.
    seq_length = 30
    class_limit =  None  # Number of classes to extract. Can be 1-101 or None for all.
    
    # Get the dataset.
    data = DataSet(seq_length=seq_length, class_limit=class_limit)
    
    # get the model.
    model = Extractor()
    print(data.data)
    
    # Loop through data.
    pbar = tqdm(total=len(data.data))
    for video in data.data:
    
    	# Get the path to the sequence for this video.
    	path = os.path.join(model_path, 'data','sequences', video[1] + '-' + str(seq_length) + '-features')  # numpy will auto-append .npy
    
    	# Check if we already have it.
    	if os.path.isfile(path + '.npy'):
    		pbar.update(1)
    		continue
    
    	# Get the frames for this video.
    	frames = data.get_frames_for_sample(video)
    
    	# Now downsample to just the ones we need.
    	frames = data.rescale_list(frames, seq_length)
    
    	# Now loop through and extract features to build the sequence.
    	sequence = []
    	for image in frames:
    		features = model.extract(image)
    		sequence.append(features)
    
    	# Save the sequence.
    	np.save(path, sequence)
    
    	pbar.update(1)
    
    pbar.close()
Example #26
0
def extract_feature(video_path='data/video', frame_path='data/frame', sequence_path='data/sequence',
                    seq_length=400, feature_length=2048):
    if not os.path.exists(sequence_path):
        os.mkdir(sequence_path)

    video_name = glob.glob(os.path.join(video_path, '*.mp4'))
    video_name_noext = [name.split(os.path.sep)[-1].split('.')[0] for name in video_name]

    pbar = tqdm(total=len(video_name_noext))
    model = Extractor()
    # model_resnet50 = Extractor(model_name='resnet50')
    for video in video_name_noext:
        img_list = glob.glob(os.path.join(frame_path, video+'_*.jpg'))
        if len(img_list) == 0:
            continue

        seqfile = os.path.join(sequence_path, video + '_' + str(seq_length) + '.npy')
        # Check if we already have it.
        if os.path.isfile(seqfile):
            pbar.update(1)
            continue

        img_list_sorted = sorted(img_list)
        frames = rescale_list(img_list_sorted, seq_length)

        sequence = []
        for image in frames:
            if image != -1:
                features = model.extract(image)
                # features_resnet50 = model_resnet50.extract(image)
                # features = np.concatenate((features, features_resnet50.flatten()))
            else:
                # zero paddind to the end of the list
                features = np.zeros((feature_length, ),dtype='float32')
            sequence.append(features)

        # Save the sequence.
        np.save(seqfile, sequence)

        pbar.update(1)
    pbar.close()
Example #27
0
class Thermometer(object):
    def __init__(self):
        artists = open('../lib/performers.csv', 'r').readlines()
        self.artists = artists[0].split(',')
        self.googler = Googler()
        self.extractor = Extractor()
        self.session = Session()

    def extract_all_reviews(self):
        for a in self.artists:
            self.extract_concert_reviews_for_performer(a)

    def extract_concert_reviews_for_performer(self, performer):
        existing = db.get_review_by_teamband_name(self.session, performer)
        if existing.count() >= 2:
            print performer + " was already in the database"
            return

        print "now scraping " + performer

        urls = self.googler.google_concert_reviews_urls(performer +
                                                        " concert reviews")

        for url in urls[:2]:
            if db.get_review_by_url(self.session, url).count() != 0:
                continue
            response = requests.get(url)
            tree = BeautifulSoup(response.text)
            most_likey_review = self.extractor.extract(tree)

            r = Review(teamband_name=performer,
                       url=url,
                       review=most_likey_review)
            self.session.add(r)
            self.session.commit()

            time.sleep(10)
def get_data(data_type):
    images = glob.glob(os.path.join('data', data_type, '**', '*.jpg'))
    images = sorted(images)

    classes = glob.glob(os.path.join('data', data_type, '*'))
    classes = sorted([item.split(os.path.sep)[-1] for item in classes])

    # get the feature extract model
    model = Extractor()

    # Now loop through and extract features to build the sequence.
    sequence = []
    labels = []
    pbar = tqdm(total=len(images))
    for image in images:
        features = model.extract(image)
        sequence.append(features)
        label = image.split(os.path.sep)[-2]
        label = classes.index(label)
        labels.append(label)
        pbar.update(1)

    return np.array(sequence), np.array(labels), len(sequence), len(
        sequence[0])
Example #29
0
def crawl_school_programs(data):
  programs = []

  for program in data:
    pprint(program)
    if program.has_key('text'):
      programs.append(program)
      continue
    url = program['url']

    print 'requesting url %s ...' % url
    r = requests.get(url, verify=False)
    if r.status_code == 200:
      html = r.text
      extractor = Extractor()
      text = extractor.extract(html)
      if len(text.strip()) != 0:
        program['text'] = text
    else:
      print 'Error code'
    
    programs.append(program)
  
  return programs
Example #30
0
	def spider(self, root, pages = True, subcategories = True, action = "traverse", preclean = False, depth = 1):
		if preclean: self.graphdb.clear()
		seen_key = "URL_SEEN"
		queue_key = "URL_QUEUE"
		ex = Extractor()
		batch = neo4j.WriteBatch(self.graphdb)

		queue_empty = lambda: self.fdb.scard(queue_key) == 0
		seen = lambda x: self.fdb.sismember(seen_key, x)
		visit = lambda x: self.fdb.sadd(seen_key, x)
		dequeue = lambda: self.fdb.spop(queue_key)
		enqueue = lambda x: self.fdb.sadd(queue_key, self._encode_str(x))

		if action == "traverse":
			enqueue(root)
			while not queue_empty():
				current = dequeue()
				print current
				if current and current.strip() and not seen(current):
					visit(current)
					result = ex.getAllFromCategory(current)
					self.updateBatch(batch, type = neo4j.Node, node = {'name': current, 'class': self.CATEGORY})
					if pages:
						for page in result['pages']:
							print "{0}\tp:{1}".format(current[:15], page)
							self.incr_rel(page, current, self.CATEGORY_REL)
							self.updateBatch(batch, type = neo4j.Node, node = {'name': page, 'class': self.ARTICLE})
							links = ex.getWikiLinks(page)
							for a in links:
								print "{0}\tp:{1}\t{2}".format(current[:15], page, a)
								self.incr_rel(a, page, self.SIBLING_REL)
								self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.ARTICLE})
					if subcategories:
						for subcat in result['categories']:
							print "{0}\tc:{1}".format(current, subcat)
							self.incr_rel(subcat, current, self.SUBCAT_REL)
							self.updateBatch(batch, type = neo4j.Node, node = {'name': subcat, 'class': self.CATEGORY})
							enqueue(subcat)
		elif action == "crawl":
			enqueue(root)
			while not queue_empty():
				topic = dequeue()
				if topic and topic.strip() and not seen(topic):
					visit(topic)
					result = ex.extract(topic)
					depth -= 1
					self.updateBatch(batch, type = neo4j.Node, node = {'name': topic, 'class': result['type']})
					if result['type'] == self.CATEGORY:
						pass
					elif result['type'] == self.ARTICLE:
						for a in result['links']:
							self.incr_rel(a, topic, self.SIBLING_REL)
							print "adding: ", a
							self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.ARTICLE})
							if depth > 0: enqueue(a)
						for c in result['categories']:
							self.incr_rel(a, topic, self.CATEGORY_REL)
							self.updateBatch(batch, type = neo4j.Node, node = {'name': c, 'class': self.CATEGORY})
					elif result['type'] == self.DISAMBIGUATION:
						for a in result['links']:
							self.incr_rel(a, topic, self.DISAMB_REL)
							self.updateBatch(batch, type = neo4j.Node, node = {'name': a, 'class': self.DISAMBIGUATION})
		print "FINISHED WITH THE NODES..."
		for k in self.fdb.smembers(self.rel_key):
			print "REL:", k
			try:
				nodes = k.split(":", 2)
				rel = nodes[0]
				n1 = self.node_index.get('name', nodes[1])[0]
				n2 = self.node_index.get('name', nodes[2])[0]
				self.updateBatch(batch, type = neo4j.Relationship, rel = {'node1': n1, 'rel': rel, 'weight': 1, 'node2': n2})
			except Exception as e:
				print "REL EXCEPTION: ", e
		print "DONE>>>>>>>>>>>>>>>"
Example #31
0
File: main.py Project: iskay/dxf
DXF_DIRECTORY = "./dxf/"
CSV_DIRECTORY = "./csv/"

ext = Extractor()

# for each dxf file in ./dxf/ generate one csv file with the same name
# in ./csv/
processed = 0
for filename in os.listdir(DXF_DIRECTORY):
    dxf_filepath = os.path.join(DXF_DIRECTORY, filename)
    # try to open the file, break if invalid file
    if ext.open_dxf(dxf_filepath):
        try:
            # extract pole coordinates/labels
            ext.extract()
            # write the csv
            csv_filepath = filename.split(".")[0] + ".csv"
            csv_filepath = os.path.join(CSV_DIRECTORY, csv_filepath)
            ext.write_csv(csv_filepath)
            processed += 1
        except:
            print(f"Error processing file {dxf_filepath}!")

    else:
        print(f"Could not open {dxf_filepath}, skipping...")

print(
    f"Complete. Successfully processed {processed} of {len(os.listdir(DXF_DIRECTORY))} files."
)
Example #32
0
class Crawler:
  """
  Main class for this dummy crawler
  """
  def __init__(self, dbfile):
    self.dbfile = dbfile
    self.data = None
    self.school_collection = None
    self.extractor = Extractor()

  def load(self):
    if self.data != None:
      print 'You have unsaved in-memory data, cannot load new data'
      exit(1)
    with open(self.dbfile, 'r') as f:
      self.data = json.load(f)
      self.school_collection = SchoolCollection(self.data['schools'])
    print 'Loaded %s json file, got %d schools' % (self.dbfile,
        self.school_collection.get_num_schools())

  def dump(self):
    if self.data == None:
      print 'Nothing to dump'
      exit(1)
    self.data = self.school_collection.toJSON()
    with open(self.dbfile, 'w') as f:
      json.dump(self.data, f)
    print 'Dumped %s json file' % self.dbfile

  def fetch(self, url):
    """
    Entrance for all kinds of HTTP requests
    """,
    is_ok,html = False,None
    try:
      response = requests.get(url, verify=False)
      if response.status_code == 200:
        html = response.text
        is_ok = True
      else:
        print >>sys.stderr, 'Error fetch'
    finally:
      return is_ok,html


  def fetch_program_text(self, url):
    """
    Just read the content from url, load <p> text only.
    I think this is the best heuristic method.
    """
    is_ok,html = self.fetch(url)
    html = html.strip()
    text = self.extractor.extract(html)
    return is_ok,text 

  # important public API
  def add_program(self, school_name, data, fetch_text=True, override_program=False):
    """
    Try to add a program to program list
    Currently I dont take care about the return value
    """
    if self.school_collection.is_school_exists(school_name) == False:
      print >>sys.stderr, "Should add school '%s' first" % school_name
      return None
    
    school = self.school_collection.find_school(school_name)
    if school.is_program_exists(data['name']):
      if override_program == False:
        return None

    prog = Program(data)
    if fetch_text:
      is_ok,text = self.fetch_program_text(prog.url)
      if is_ok:
        prog.text = text

    pprint(prog.toJSON())
    school.insert_program(prog)
    return None
Example #33
0
def show_webcam(mirror=False):  

    
    # initialize the video stream and pointer to output video file, then
    # allow the camera sensor to warm up
    print("[INFO] starting video stream...")
    writer = None
    #
    saved_model = 'data/checkpoints/lstm-features.037-0.131.h5'
    vs = cv2.VideoCapture(-1)
    time.sleep(2)
    # Set defaults.
    seq_length = 40
    class_limit = 10  # Number of classes to extract. Can be 1-101 or None for all.
    data = DataSet(seq_length=seq_length, class_limit=class_limit)
    # get the model.

    modelE = Extractor()
    model = load_model(saved_model)
    # loop over frames from the video file stream
    
    while True:
        # grab the frame from the threaded video stream
    
        first =""
        v1 =""

        sequence = []
        for i in range (0,40):
            
            ret_val,frame = vs.read()
            if ret_val == True:
                if mirror: 
                    frame = cv2.flip(frame, 1)
                width = np.size(frame, 1)
                height = np.size(frame, 0)
                x = width/2
                y = height/2
                cv2.imshow('my webcam', frame)
                cv2.putText(frame, first + v1, (x,y), cv2.FONT_HERSHEY_PLAIN, 1.0, (255,0,0), thickness=1)
                frame = cv2.resize(frame,(299,299), interpolation = cv2.INTER_CUBIC)
                if cv2.waitKey(1) == 27: 
                    break  # esc to quit
            else:
                break
            features = modelE.extract(frame)

            sequence.append(features)

               
            
        # Predict!
        print( np.shape(sequence))
        prediction = model.predict(np.expand_dims(sequence, axis=0))
        print(prediction)
        sorted_lps = data.print_class_from_prediction(np.squeeze(prediction, axis=0))
        for i, class_prediction in enumerate(sorted_lps):
            if i > 10 - 1 or class_prediction[1] == 0.0:
                break
            print("%s: %.2f" % (class_prediction[0], class_prediction[1]))
            first = class_prediction[0]
            v1 = class_prediction[1]
directories = os.listdir(dirrr)

dframes = []
for directory in directories:
    print(directory)
    indir = os.path.join(dirrr, directory)

    labels = label(directory)
    dframes.append(labels)
    videos = labels['Path']
    for video in tqdm(videos):
        print(video)
        video = video[:-4]
        invideo = os.path.join(indir, video)

        video = video.replace('/', '_')
        outvideo = os.path.join(output, video)

        framename = os.listdir(invideo)
        framename.sort()

        seq = []
        for frame in framename:
            inimg = os.path.join(invideo, frame)
            features = model.extract(inimg)
            seq.append(features)

        seq = np.array(seq)
        np.save(outvideo + '.npy', seq)

pd.concat(dframes).to_csv(maindir + '/Labels' + '/Final.csv', index=False)
Example #35
0
    for read in read_data_list:
        
        # バイナリ用と多クラス用のラベリングを作成
        binary_label = int(float(read[1]))
        #if binary_label == 1:
        #    class_num += 1

        img_path = str(read[0])
        # ディレクトリを修正
        img_path = img_path.replace('./data', '/media/futami/HDD1/DATASET_KINGDOM/Scene')

        print(img_path)
        
        # 特徴ベクトルをnumpy形式で保存
        # 使用する特徴抽出器を選択
        feature = model.extract(img_path, model_name)
        feature_shape = str(feature.shape)

        feature = feature.tolist()

        feature.insert(0, img_path)
        feature.insert(1, binary_label)
        #feature.insert(2, class_num)

        new_data_list.append(feature)

        print ('extra feature: ' + img_path)

    # save labeling as csv file
    with open(NEW_LABELING_DIR + basename , 'w') as f:
        writer = csv.writer(f)
Example #36
0
class ICLoop(QObject):
    stateChanged = pyqtSignal(str)
    icmodeChanged = pyqtSignal(str)

    # stage events
    preparing2inject = pyqtSignal()
    injecting = pyqtSignal()
    injected = pyqtSignal()
    preparing2extract = pyqtSignal()
    extracting = pyqtSignal()
    extracted = pyqtSignal()

    def __init__(self):
        QObject.__init__(self)

        self.linStarter = LinStarter()
        self.extractor = Extractor()
        self.modeCtl = modes.ModesClient()

        self.particles = 'e'  #  'e', 'p'
        self.stored_particles = None  # None, 'e', 'p'
        self.requested_particles = None  # None (means do not switch), 'e', 'p'
        self.beam_user = None  # None, 'v2', 'v4'
        self.requested_beam_user = None
        self.requested_runmode = None

        self.ic_runmode = 'manual'  # 'manual', 'single', 'round', 'auto'
        self.state = "idle"
        self.state_ind = 0

        self.shots = {'e': 5, 'p': 50}

        self.kickers_subsys = [22, 18,
                               19]  # subsystems to switch injection-extraction
        self.ic_subsys = [
            32, 17, 38, 52, 53, 54, 55, 29, 59, 61, 62, 63, 64, 65, 67, 68, 60,
            69, 70, 71, 72, 73, 66, 74, 75, 50, 3, 4, 51, 5, 6, 23, 30, 7, 8,
            9, 10, 11, 33, 37
        ]
        self.k500_subsys = [
            34, 24, 45, 12, 56, 13, 46, 14, 57, 15, 47, 43, 76, 58, 44, 48, 26,
            25, 49, 28, 27
        ]

        self.timer = QTimer()

        self.modeCtl.markedReady.connect(self.nextState)
        self.linStarter.runDone.connect(self.nextState)
        self.extractor.extractionDone.connect(self.nextState)

        self.states = [
            self.__idle, self.__preinject, self.__injecting, self.__injected,
            self.__preextract, self.__extracting, self.__extracted
        ]

    # stat machine switching conditions implementation
    def nextState(self):
        if self.ic_runmode != "manual":
            # for manual operation - just proc requested stage if possible and stop
            self.stateChanged.emit(self.state)
            return

        self.state_ind += 1
        self.state = _states[self.state_ind]
        self.stateChanged.emit(self.state)
        self.states[self.state_ind]()

        if self.state == "injected" and self.ic_runmode in ["round", "auto"]:
            self.nextState()

        if self.state == 'extracted' and self.ic_runmode in ["round", "auto"]:
            self.state_ind = 0
            self.state = _states[self.state_ind]
            self.nextState()

    # state functions: what to do when proceeding to state

    def __idle(self):
        pass

    def __preinject(self):
        # check for requests
        if self.requested_particles:
            pass
        if self.requested_beam_user:
            pass

        self.linStarter.setRunmode(1)
        self.modeCtl.load_marked(mode_map[self.particles + 'inj'],
                                 self.kickers_subsys)

    def __injecting(self):
        self.linStarter.newCounterCycle(self.shots[self.particles])
        # after injection initiation - possible some particles already stored
        self.stored_particles = self.particles

    def __injected(self):
        pass

    def __preextract(self):
        self.modeCtl.load_marked(mode_map[self.particles + 'ext'],
                                 self.kickers_subsys)

    def __extracting(self):
        self.extractor.extract()

    def __extracted(self):
        # the particles are gone
        self.stored_particles = None

    # commands inplementalions -------------------------

    # not really correct... we need to initiate end of round,
    # extract beam if needed and then make changes to magnetic systems

    def setUseCase(self, particles, beam_user):
        if self.beam_user == beam_user and self.particles == particles:
            # no changes
            return
        mode_subsys = []
        if self.particles == particles and self.beam_user != beam_user:
            # just beam user changed, possibly no changes to IC
            # need to initiate channels remag

            # stop beam if running, don't drop beam
            self.requested_runmode = self.runmode
            if self.state != 'idle':
                self.stop()
            mode_subsys = self.k500_subsys
        if self.particles != particles and self.beam_user == beam_user:
            # need to drop beam and change everything in magsys

            # ask to drop a beam
            if self.state != 'idle':
                self.extract()
            mode_subsys = self.ic_subsys + self.k500_subsys

        if self.particles != particles and self.beam_user != beam_user:
            # need to drop beam and change everything in magsys

            # ask to drop a beam
            if self.state != 'idle':
                self.extract()
            mode_subsys = self.ic_subsys + self.k500_subsys

        start_mode = mode_num(self.particles, self.beam_user)
        target_mode = mode_num(particles, beam_user)

        mag_path = {
            name: mode_path_num(name, start_mode, target_mode)
            for name in remag_devs
        }

        #self.modeCtl.load_marked(mode, mode_subsys)

    def setLinRunMode(self, runmode):
        if isinstance(runmode, str):
            mode_val = runmodes[runmode]
        else:
            mode_val = runmode
        if self.linStarter.runmode == mode_val:
            # no changes
            return
        if self.state != 'idle':
            # if in any automatic stages - go to idle state
            self.stop()
        self.linStarter.setRunmode(runmode)

    def setEshots(self, num):
        self.shots['e'] = int(num)

    def setPshots(self, num):
        self.shots['p'] = int(num)

    # stop any operation
    def stop(self):
        self.ic_runmode = 'manual'
        self.state = 'idle'
        self.state_ind = 0
        self.icmodeChanged.emit(self.ic_runmode)
        self.linStarter.stopCounter()
        self.extractor.stopExtraction()

    def inject(self):
        self.ic_runmode = 'single'
        self.state = 'idle'
        self.state_ind = 0
        self.icmodeChanged.emit(self.ic_runmode)
        self.nextState()

    def extract(self):
        self.ic_runmode = 'manual'
        self.state = 'injected'
        self.state_ind = 3
        self.icmodeChanged.emit(self.ic_runmode)
        self.nextState()

    def execRound(self):
        self.ic_runmode = 'round'
        self.state = 'idle'
        self.state_ind = 0
        self.icmodeChanged.emit(self.ic_runmode)
        self.nextState()

    def execBurst(self):
        self.ic_runmode = 'auto'
        self.state = 'idle'
        self.state_ind = 0
        self.icmodeChanged.emit(self.ic_runmode)
        self.nextState()
Example #37
0
def extract():
    extractor = Extractor()
    extractor.extract()
Example #38
0
def extract():
  extractor = Extractor()
  extractor.extract()
Example #39
0
def main():
    parser = argparse.ArgumentParser("PyTorch Face Recognizer")
    parser.add_argument('--cmd',
                        default='extract',
                        type=str,
                        choices=['train', 'test', 'extract'],
                        help='train, test or extract')

    parser.add_argument('--arch_type',
                        type=str,
                        default='senet50_ft',
                        help='model type',
                        choices=[
                            'resnet50_ft', 'senet50_ft', 'resnet50_scratch',
                            'senet50_scratch'
                        ])

    parser.add_argument('--dataset_dir',
                        type=str,
                        default='/tmp/Datasets/3Dto2D/squared/uniques',
                        help='dataset directory')

    parser.add_argument('--log_file',
                        type=str,
                        default='/path/to/log_file',
                        help='log file')
    parser.add_argument(
        '--train_img_list_file',
        type=str,
        default='/path/to/train_image_list.txt',
        help='text file containing image files used for training')
    parser.add_argument(
        '--test_img_list_file',
        type=str,
        default='/path/to/test_image_list.txt',
        help=
        'text file containing image files used for validation, test or feature extraction'
    )
    parser.add_argument(
        '--meta_file',
        type=str,
        default='/tmp/face-hallucination/style/vgg-face/identity_meta.csv',
        help='meta file')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='/path/to/checkpoint_directory',
                        help='checkpoints directory')
    parser.add_argument('--feature_dir',
                        type=str,
                        default='/path/to/feature_directory',
                        help='directory where extracted features are saved')
    parser.add_argument(
        '-c',
        '--config',
        type=int,
        default=1,
        choices=configurations.keys(),
        help='the number of settings and hyperparameters used in training')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='batch size')
    parser.add_argument('--resume',
                        type=str,
                        default='',
                        help='checkpoint file')
    parser.add_argument(
        '--weight_file',
        type=str,
        default=
        '/tmp/face-hallucination/style/vgg-face/models/senet50_ft_weight.pkl',
        help='weight file')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('-j',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument(
        '--horizontal_flip',
        action='store_true',
        help='horizontally flip images specified in test_img_list_file')
    args = parser.parse_args()
    print(args)

    if args.cmd == "extract":
        utils.create_dir(args.feature_dir)

    if args.cmd == 'train':
        utils.create_dir(args.checkpoint_dir)
        cfg = configurations[args.config]

    log_file = args.log_file
    resume = args.resume

    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
    cuda = torch.cuda.is_available()
    if cuda:
        print("torch.backends.cudnn.version: {}".format(
            torch.backends.cudnn.version()))

    torch.manual_seed(1337)
    if cuda:
        torch.cuda.manual_seed(1337)

    # 0. id label map
    meta_file = args.meta_file
    id_label_dict = utils.get_id_label_map(meta_file)

    # 1. data loader
    root = args.dataset_dir
    train_img_list_file = args.train_img_list_file
    test_img_list_file = args.test_img_list_file

    kwargs = {'num_workers': args.workers, 'pin_memory': True} if cuda else {}

    if args.cmd == 'train':
        dt = datasets.VGG_Faces2(root,
                                 train_img_list_file,
                                 id_label_dict,
                                 split='train')
        train_loader = torch.utils.data.DataLoader(dt,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

    dv = datasets.VGG_Faces2(root,
                             test_img_list_file,
                             id_label_dict,
                             split='valid',
                             horizontal_flip=args.horizontal_flip)
    val_loader = torch.utils.data.DataLoader(dv,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             **kwargs)

    # 2. model
    include_top = True if args.cmd != 'extract' else False
    if 'resnet' in args.arch_type:
        model = ResNet.resnet50(num_classes=N_IDENTITY,
                                include_top=include_top)
    else:
        model = SENet.senet50(num_classes=N_IDENTITY, include_top=include_top)
    # print(model)

    start_epoch = 0
    start_iteration = 0
    if resume:
        checkpoint = torch.load(resume)
        model.load_state_dict(checkpoint['model_state_dict'])
        start_epoch = checkpoint['epoch']
        start_iteration = checkpoint['iteration']
        assert checkpoint['arch'] == args.arch_type
        print("Resume from epoch: {}, iteration: {}".format(
            start_epoch, start_iteration))
    else:
        utils.load_state_dict(model, args.weight_file)
        if args.cmd == 'train':
            model.fc.reset_parameters()

    if cuda:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    if cuda:
        criterion = criterion.cuda()

    # 3. optimizer
    if args.cmd == 'train':
        optim = torch.optim.SGD([
            {
                'params': get_parameters(model, bias=False)
            },
            {
                'params': get_parameters(model, bias=True),
                'lr': cfg['lr'] * 2,
                'weight_decay': 0
            },
        ],
                                lr=cfg['lr'],
                                momentum=cfg['momentum'],
                                weight_decay=cfg['weight_decay'])
        if resume:
            optim.load_state_dict(checkpoint['optim_state_dict'])

        # lr_policy: step
        last_epoch = start_iteration if resume else -1
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optim,
                                                       cfg['step_size'],
                                                       gamma=cfg['gamma'],
                                                       last_epoch=last_epoch)

    if args.cmd == 'train':
        trainer = Trainer(
            cmd=args.cmd,
            cuda=cuda,
            model=model,
            criterion=criterion,
            optimizer=optim,
            lr_scheduler=lr_scheduler,
            train_loader=train_loader,
            val_loader=val_loader,
            log_file=log_file,
            max_iter=cfg['max_iteration'],
            checkpoint_dir=args.checkpoint_dir,
            print_freq=1,
        )
        trainer.epoch = start_epoch
        trainer.iteration = start_iteration
        trainer.train()
    elif args.cmd == 'test':
        validator = Validator(
            cmd=args.cmd,
            cuda=cuda,
            model=model,
            criterion=criterion,
            val_loader=val_loader,
            log_file=log_file,
            print_freq=1,
        )
        validator.validate()
    elif args.cmd == 'extract':
        extractor = Extractor(
            cuda=cuda,
            model=model,
            val_loader=val_loader,
            log_file=log_file,
            feature_dir=args.feature_dir,
            flatten_feature=True,
            print_freq=1,
        )
        extractor.extract()
pbar = tqdm(total=len(data.data))
for video in data.data:

    # Get the path to the sequence for this video.
    path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)

    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for image in frames:
        features = model.extract(image)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)

    pbar.update(1)

pbar.close()
#for video in data.data:
for index, i in enumerate(sequences):

    # Get the path to the sequence for this video.
    #path = os.path.join('train', video[0])  # numpy will auto-append .npy
    path = i

    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = sorted(glob.glob(os.path.join(path, '*jpg')))

    # Now downsample to just the ones we need.
    frames = data.rescale_list(frames, 40)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for image in frames:
        features = model.extract(image)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)

    pbar.update(1)

pbar.close()
Example #42
0
class MainFrame(Frame): 
    def __init__(self, parent):
        Frame.__init__(self, parent)   
         
        self.parent = parent 

        self.music_root = ''
        self.query_path = ''
        self.extractor = Extractor(n_frames=40, 
                                   n_blocks=100, 
                                   learning_rate=0.00053,
                                   verbose=True)

        self.style = Style()
        self.style.theme_use("default")
        
        padx = 2
        pady = 2

        root_select_button = Button(self, text="Select a directory")
        root_select_button.pack(fill=tkinter.X, padx=padx, pady=pady)
        root_select_button.bind("<Button-1>", self.set_music_root)

        analyze_button = Button(self, text="Analyze")
        analyze_button.pack(fill=tkinter.X, padx=padx, pady=pady)
        analyze_button.bind("<Button-1>", self.analyze)

        query_select_button = Button(self, text="Select a file")
        query_select_button.pack(fill=tkinter.X, padx=padx, pady=pady)
        query_select_button.bind("<Button-1>", self.set_query_path)

        search_button = Button(self, text="Search similar songs")
        search_button.pack(fill=tkinter.X, padx=padx, pady=pady)
        search_button.bind("<Button-1>", self.search_music)
 
        self.pack(fill=BOTH, expand=1)

    def set_music_root(self, event):
        self.music_root = filedialog.askdirectory()

    def analyze(self, event):
        if(self.music_root == ''):
            #TODO show error dialog 
            print("Set a music directory first")
            return

        print("Analyzing")
        path_feature_map, error = self.extractor.extract(self.music_root)

        print("Saving")
        filename = os.path.basename(self.music_root)
        jsonpath = os.path.join(jsondir, '{}.json'.format(filename))

        dump_json(path_feature_map, jsonpath)

    def set_query_path(self, event):
        self.query_path = filedialog.askopenfilename(initialdir=self.music_root)

    def search_music(self, event):
        if(self.query_path == ''):
            #TODO show error dialog 
            print("Set a music file first")
            return
    
        k_nearest = search(self.query_path)

        music_list = MusicList(self)
        for path, vector in k_nearest:
            music_list.append(path)