def main(): level = log.DEBUG if Config.debug else log.INFO log.basicConfig(level=level) log.info('AI::VERA - Launching...') if Commons.isEnv(Config.vera): vera = Commons.getEnv(Config.vera) if len(vera) < 1 or checkPath(vera, dir=False): invalidPath(Config.vera, vera) return 0 log.info('AI::VERA - is running file system check...') vera_setup.setup() run() log.info('AI::VERA - has terminated')
def run(): ghostscriptPath = Commons.getEnv('GS_PATH') dirs = sys.argv[1:] gs_exe = path.join(ghostscriptPath, 'gswin64c.exe') for p in dirs: if not path.exists(p): print 'Error - path', p, ' does not exist. Skipping...' continue print 'Processing dir', p for pdf_path in glob(path.join(p, '*.pdf')): head, tail = path.split(pdf_path) tiff = tail.replace('.pdf', '-%04d.tiff') tiff_path = path.join(head, tiff) print 'Convert', pdf_path, 'to TIFF', tiff_path if path.exists(tiff_path): os.unlink(tiff_path) #@TODO: PAPERSIZE has to be the same as the source!!! os.popen(' '.join([ gs_exe, '-q', '-dNOPAUSE', '-dBATCH', '-r300', '-sDEVICE=tiffgray', '-sPAPERSIZE=a4', '-sOutputFile=%s %s' % (tiff_path, pdf_path), ]))
def get_model_template(): vera = Commons.getEnv(Config.vera) model_path = path.join(vera, Setup.path_model, Setup.model_config) log.info('AI::VERA - Loading model metadata for type <<< %s >>>', Setup.model.upper()) if not path.exists(model_path): # @TODO: handle differently to indicate that initialization failed log.error('AI::VERA - Cannot find model metadata file %s', model_path) return meta = json.load(open(model_path, 'r')) template = '' for m in meta['models']: if Setup.model in m['name']: template = m['template'] break return template
def gen_vocab(): vera = Commons.getEnv(Config.vera) train_path = path.join(vera, Setup.path_train) for img_file in glob(path.join(train_path, '*.tiff')): head, tail = path.split(img_file) if tail.find('.tiff') != -1: tail = tail.replace('.tiff', '-raw.ai') raw_file = join(head, tail) # @FIXME: needs to work with multiple extensions processor = ProcessImage() processor.process_img(img_file, raw_file) head, tail = path.split(raw_file) if tail.find('-raw.ai') != -1: tail = tail.replace('-raw.ai', '-vera.ai') vera_file = join(head, tail) processor = ProcessRaw() processor.process_raw(raw_file, vera_file) #After all the images have been processed - generate the vocabulary vocab_processor = ProcessRaw() vocab_processor.process_vocab(train_path)
def __init__(self): log.info('AI::VERA - Instance %s has been created', type(self).__name__) vera = Commons.getEnv(Config.vera) model_path = path.join(vera, Setup.path_model, Setup.model_config) log.info('AI::VERA - Loading model metadata for type <<< %s >>>', Setup.model.upper()) if not path.exists(model_path): #@TODO: handle differently to indicate that initialization failed log.error('AI::VERA - Cannot find model metadata file %s', model_path) return meta = json.load(open(model_path, 'r')) for m in meta['models']: if Setup.model in m['name']: self.classes = tuple(m['labels']) self.vocab = m['vocab'] self.model = m['model'] break
def run_input(stopEvent, timeout, debug=False): path = os.path.join(Commons.getEnv(Config.vera), Setup.path_inp) event_handler = vera_handler.InputHandler(debug) observer = Observer() observer.schedule(event_handler, path, recursive=False) observer.start() log.info('AI::VERA - running <%s> Watching path: [%s]', 'run_img', path) try: while True: event_set = stopEvent.wait(timeout) if event_set: break else: continue except: observer.stop() log.info('AI::VERA - stopping %s', 'run_img') if observer.isAlive(): observer.stop() observer.join() return True
def run_training(labs): vera = Commons.getEnv(Config.vera) train_path = path.join(vera, Setup.path_train) classifier = Classifier() classifier.setup_traing(train_path) input_X = list() input_y = list() for vera_file in glob(path.join(train_path, '*-vera.ai')): x = classifier.load_data(vera_file) labels = np.zeros(len(labs), dtype=int) for label in labs: if label in vera_file: labels[labs.index(label)] = 1 break input_X.append(x) input_y.append(labels) conf = Config_NN() conf.debug = True in_sz = len(input_X[0]) out_sz = len(labels) conf.num_passes = 10000 conf.num_samples = 10000 conf.num_train = 8000 conf.num_test = 2000 conf.noise = 0.05 conf.nn_input_dim = in_sz # input layer dimensionality conf.nn_output_dim = out_sz # output layer dimensionality conf.nn_hidden_dim = in_sz * 2 conf.reg = 0.00005 conf.alpha = 0.3 X = np.array(input_X) y = np.array(input_y) model = build_model(conf) model, lowest_J = train_model(model, X, y, conf) model_path = path.join(vera, Setup.path_train, Setup.name_model) pickle.dump(model, open(model_path, 'wb'))
def __init__(self, debug): PatternMatchingEventHandler.__init__(self, patterns=self.patterns) self.debug = debug self.myPath = join(Commons.getEnv(Config.vera), Setup.path_raw) self.nextPath = join(Commons.getEnv(Config.vera), Setup.path_inp) log.info('AI::VERA - Created instance: %s', type(self).__name__)
def setup(self): vera = Commons.getEnv(Config.vera) self.model = self.load_model(vera) self.vocab = self.load_vocab(vera)
def setup_traing(self, path_to_traing_data): self.vera = Commons.getEnv(Config.vera) vocab_path = path.join(self.vera, Setup.path_train, Setup.name_vocab) self.vocab = self.load_vocab_from_path(vocab_path)