def main(indexer_token, indexer_URL, manifestPath, outputXesPath): theIndexer = indexer.IndexerClient(indexer_token, indexer_URL, headers={'User-Agent': '?'}) extract(theIndexer, manifestPath, outputXesPath) sys.exit(0)
def plotBelos(files=None, leg=None): if files is None: files = ['./Picard.txt'] i = 0 for file_str in files: lin_iter = ex.extract(file_str, ex.BelosMaxItPattern) linatol = ex.extract(file_str, ex.BelosArTolPattern) print(linatol) pl.figure(4) if isinstance(lin_iter, float): # wtf lin_iter = [lin_iter] pl.plot(range(1, len(lin_iter) + 1), lin_iter, marker='.') pl.xlabel('Picard iteration') pl.ylabel(r'linear iterations') pl.ylabel(r'linear iterations', ha='left', va='bottom', rotation=0) pl.gca().yaxis.set_label_coords(-0.08, 1.02) if leg is not None: pl.legend(leg, loc=0) pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True)) pl.savefig('liniter.pdf', bbox_inches='tight') # pl.figure(5) pl.semilogy(range(1, len(linatol) + 1), linatol, marker='.') pl.xlabel('Picard iteration') pl.ylabel(r'archieved tolerance of the linear solver') pl.ylabel(r'archieved tolerance of the linear solver', ha='left', va='bottom', rotation=0) pl.gca().yaxis.set_label_coords(-0.08, 1.02) # legend(leg,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) if leg is not None: pl.legend(leg, loc=0) pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True))
def plotNOX(paths=None, filename='output', run='', newton=False, save=False): """ plots residual ... over iterations (deprecated) """ if paths is None: paths = ['./'] for path in paths: iter_count = ex.extract(path + filename + str(run), ex.NOXIterPattern) res = ex.extract(path + filename + str(run), ex.NOXResPattern) # dof = ex.extract(path+filename+str(run), ex.PimpDofPattern)[0][0] # print('dof: ', dof) print(iter_count) print(res) pl.figure(1) # pl.semilogy(iter_count, res[:, 0]/pl.sqrt(dof)) # pl.semilogy(res[:, 0]/pl.sqrt(dof), marker='.') pl.semilogy(res[:, 0], marker='.') if newton: pl.xlabel('Newton step') else: pl.xlabel('Picard iteration') pl.ylabel(r'$||\mathbf{r}||_2/\sqrt{N}$') # pl.ylabel(r'$||\mathbf{r}||_2/\sqrt{N}$', ha='left', va='bottom', # rotation=0) # pl.gca().yaxis.set_label_coords(-0.08, 1.02) pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True)) if save: pl.savefig('F.pdf', bbox_inches='tight') pl.figure(2) pl.semilogy(iter_count[1:], res[1:, 1], basey=2, marker='.') if newton: pl.xlabel('Newton step') else: pl.xlabel('Picard iteration') pl.ylabel(r'step width') # pl.ylabel(r'step width', ha='left', va='bottom', rotation=0) # pl.gca().yaxis.set_label_coords(-0.08, 1.02) pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True)) if save: pl.savefig('lam.pdf', bbox_inches='tight') # pl.figure(3) # pl.semilogy(iter_count[1:], res[1:, 2]/pl.sqrt(dof), marker='.') pl.semilogy(iter_count[1:], res[1:, 2], marker='.') if newton: pl.xlabel('Newton step') else: pl.xlabel('Picard step') pl.ylabel(r'$||\delta\mathbf{q}||_2/\sqrt{N}$') pl.gca().yaxis.set_label_coords(-0.08, 1.02) pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True)) if save: pl.savefig('du.pdf', bbox_inches='tight')
def main(): if not os.path.exists(os.path.expanduser('~/.bpkg')): os.mkdir(os.path.expanduser('~/.bpkg')) if len(sys.argv) == 1: print('Error: No arguments present.') os.system('python bpkg.py -h') exit() # Change this each time version changes if sys.argv[1] == '-v': print('bpkg application packaging utility version 1.0.0') exit() if sys.argv[1] == '-h': print(''' bpkg syntax - bpkg [-h|-v|start|bundle|extract|cleanup] foldername bpkg is a tool used to bundle applications. cleanup - Cleans up the bpkg cache start - This command is used to start the application. bundle - This command is used to bundle an application hence the name. extract - This command is used to extract the contents of an application from its bundle. ''') exit() if not sys.argv[1] == 'cleanup': if sys.argv[2] == None: print( 'The directory of the project you want to package should be the second argument' ) if sys.argv[1] == 'start': start_app(sys.argv[2]) elif sys.argv[1] == 'cleanup': for root, dirs, files in os.walk(os.path.expanduser('~/.bpkg')): for f in files: os.unlink(os.path.join(root, f)) for d in dirs: shutil.rmtree(os.path.join(root, d)) elif sys.argv[1] == 'bundle': bundle(sys.argv[2]) elif sys.argv[1] == 'extract': extract(sys.argv[2]) else: print( 'bundle, start, cleanup or extract should be present as the second argument' ) exit()
def read(self, fname): if os.path.exists(self.path+fname): return cPickle.load(open(self.path+fname, 'rb')) dt = {'images':[], 'landmarks':[], 'aus':[], 'subjects':[], 'sequences':[]} # Get directory structure subjects = sorted([f for f in os.listdir(self.path_im)]) sequences = [sorted(os.listdir(self.path_im+sub+'/')) for sub in subjects] print('###### READING PAIN DATASET ######') for subject, subject_sequences in zip(subjects, sequences): for sequence in subject_sequences: print('Subject:{}, Sequence:{}'.format(subject,sequence)) rpath = subject+'/'+sequence + '/' im_seq = read_folder(self.path_im+rpath) lm_seq = np.asarray(read_folder(self.path_lm+rpath), dtype=np.float16)[:,:,::-1] # Extract face and resize S = map(list, zip(*[extract(i,l,1,30) for i,l in zip(im_seq, lm_seq)])) dt['images'].append(S[0]) dt['landmarks'].append(S[1]) dt['aus'].append(read_folder(self.path_au+rpath)) dt['subject'].append(subject) dt['sequence'].append(sequence) cPickle.dump(dt, open(self.path+fname, 'wb'), cPickle.HIGHEST_PROTOCOL) return dt
def save_extractor_case(doi, is_biorxiv): r = extract(doi, is_biorxiv, False, False) r['date'] = r['date'].strftime('%Y-%m-%d') r['image_dir'] = os.listdir(r['image_dir']) open('test/extract_test_cases/{}.json'.format(doi.replace('/', '_')), 'w', encoding='utf-8').write(json.dumps(r))
def load_text(file_name): ''' loads text from specific folder on the filing system. Parameters ---------- file_name: str the name of the folder containing the texts Returns ---------- total_text : list list of raw texts from each document. file_names : list list of file names. ''' total_text = [] file_names = [] path = 'uploads/extracted/' + str(file_name) for filename in os.listdir(path): text, tokens, keywords = extract(os.path.join(path, filename)) total_text.append(text) file_names.append(filename) return total_text, file_names
def make_page_features(pages: np.ndarray) -> pd.DataFrame: """ Calculates page features (site, country, agent, etc) from urls :param pages: Source urls :return: DataFrame with features as columns and urls as index """ tagged = extractor.extract(pages).set_index('page') # Drop useless features为何将标题列也删除? features: pd.DataFrame = tagged.drop(['term', 'marker'], axis=1) # todo # tagged= agent ... marker # page ... # !vote_en.wikipedia.org_all-access_all-agents all-access_all-agents ... NaN # !vote_en.wikipedia.org_all-access_spider all-access_spider ... NaN # !vote_en.wikipedia.org_desktop_all-agents desktop_all-agents ... NaN # ... ... ... ... # [Alexandros]_ja.wikipedia.org_all-access_all-ag... all-access_all-agents ... NaN # [Alexandros]_ja.wikipedia.org_all-access_spider all-access_spider ... NaN # [Alexandros]_ja.wikipedia.org_desktop_all-agents desktop_all-agents ... NaN # [Alexandros]_ja.wikipedia.org_mobile-web_all-ag... mobile-web_all-agents ... NaN # [145036 rows x 5 columns];(145036, 5); # & # features= agent site country # page # !vote_en.wikipedia.org_all-access_all-agents all-access_all-agents wikipedia.org en # !vote_en.wikipedia.org_all-access_spider all-access_spider wikipedia.org en # !vote_en.wikipedia.org_desktop_all-agents desktop_all-agents wikipedia.org en # ... ... ... ... # [Alexandros]_ja.wikipedia.org_all-access_all-ag... all-access_all-agents wikipedia.org ja # [Alexandros]_ja.wikipedia.org_all-access_spider all-access_spider wikipedia.org ja # [Alexandros]_ja.wikipedia.org_desktop_all-agents desktop_all-agents wikipedia.org ja # [Alexandros]_ja.wikipedia.org_mobile-web_all-ag... mobile-web_all-agents wikipedia.org ja # [145036 rows x 3 columns];(145036, 3) # print(f"tagged={tagged};{tagged.shape}; features={features};{features.shape}") return features
def polt_speedup(paths, nps, lab=None, runs=None): """ plots speedup """ if lab is None: lab = [] if runs is None: runs = [''] time = [] for path in paths: temptime = 1e99 for run in runs: tempnew = ex.extract(path + 'output' + str(run), ex.PimpSolveTimePattern, isarray=False) print('tempnew: ', tempnew) tempnew = tempnew temptime = min(temptime, tempnew) time.append(temptime) # print('nps: ', nps) print('time: ', time) if not lab: pl.plot(nps, time[0] / pl.array(time), '.-', ms=5) else: pl.plot(nps, time[0] / pl.array(time), '.-', ms=5, label=lab) pl.plot(nps, pl.array(nps) / pl.array(nps[0]), ':', lw=2) pl.ylim(ymin=1) pl.gca().xaxis.set_ticks(nps) pl.xlabel('number of cores') pl.ylabel('speed-up', ha='left', va='bottom', rotation=0) pl.gca().yaxis.set_label_coords(-0.05, 1.075) pl.gca().yaxis.set_label_coords(-0.08, 1.02)
def analyze(ref=''): """ analyze """ # files = [ 'Picard' + ref + '.txt', 'MHDtConvectionDiffusion' + ref + '.txt', 'DivGrad' + ref + '.txt', 'ModeNonlinearOp_ConvectionDiffusionVOp' + ref + '.txt', 'ConvectionDiffusionVOp' + ref + '.txt' ] # for fil in files: stats = ex.extract(fil, ex.BelosIterPattern) count = len(stats[:, 0]) print(fil, ' (', count, ')') if count != 0: print('total: ', sum(stats[:, 0])) print('iter: ', np.percentile(stats[:, 0], 10), np.percentile(stats[:, 0], 50), np.percentile(stats[:, 0], 90)) print('iter: ', np.mean(stats[:, 0]), '+- ', np.std(stats[:, 0])) print('max: ', np.max(stats[:, 0])) print('tol: ', np.percentile(stats[:, 1], 10), np.percentile(stats[:, 1], 50), np.percentile(stats[:, 1], 90)) print('tol: ', np.mean(stats[:, 1]), '+- ', np.std(stats[:, 1])) print()
def synchronize(): arr = extract() plantgoed = arr[0] verkoopbare = arr[1] coniferen = arr[2] db = MySQLdb.connect(host="188.241.112.113", user="******", passwd="brecht123", db="kapelhof") cursor = db.cursor() #clear the table: cursor.execute("TRUNCATE TABLE voorraad2;") #plantgoed: for i in xrange(len(plantgoed)): cursor.execute("INSERT INTO voorraad2(Beschrijving, Prijs, Voorradig, Plantgoed, Id) VALUES(%s, %s, %s, %s, %s)", (plantgoed[i]['beschrijving'], plantgoed[i]['prijs'], plantgoed[i]['aantal'], 1, plantgoed[i]['id']) ) #verkoopbare: for i in xrange(len(verkoopbare)): cursor.execute("INSERT INTO voorraad2(Beschrijving, Prijs, Voorradig, Plantgoed, Id) VALUES(%s, %s, %s, %s, %s)", (verkoopbare[i]['beschrijving'], verkoopbare[i]['prijs'], verkoopbare[i]['aantal'], 0, verkoopbare[i]['id'])) #coniferen: for i in xrange(len(coniferen)): cursor.execute("INSERT INTO voorraad2(Beschrijving, Prijs, Voorradig, Plantgoed, Id) VALUES(%s, %s, %s, %s, %s)", (coniferen[i]['beschrijving'], coniferen[i]['prijs'], coniferen[i]['aantal'], 0, coniferen[i]['id'])) db.commit() return True
def order_phrases(extraction_file): phrases = extract(extraction_file) characters = {} dialogues = [] for phrase in phrases: if phrase.startswith("SCENE") or phrase.startswith( "ACT") or "END" in phrase: continue character_name = get_character_name(phrase) if character_name is not None: character_name = character_name.title() character_phrase = nltk.sent_tokenize(phrase)[1:] if character_name not in characters: gender = input("Which gender is %s? (male/female): " % character_name) characters[character_name] = gender dialogues.append((character_name, character_phrase)) return characters, dialogues
def handle_compressed_file(file_path, compressed_file_name): decompress(file_path, compressed_file_name) compressed_file_name_without_extension = compressed_file_name.split('.')[0] compressed_file_name_without_extension_uuid = compressed_file_name_without_extension + "_" + str( myid) compressed_file_name_uuid = compressed_file_name + "_" + str(myid) session['compressed_file_name'] = compressed_file_name session[ 'compressed_file_name_without_extension'] = compressed_file_name_without_extension session[ 'compressed_file_name_without_extension_uuid'] = compressed_file_name_without_extension_uuid session['compressed_file_name_uuid'] = compressed_file_name_uuid totalvocab_stemmed = [] totalvocab_tokenized = [] total_text = [] file_names = [] for filename in os.listdir( 'uploads/extracted/' + str(compressed_file_name_without_extension_uuid)): mypath = 'uploads/extracted/' + str( compressed_file_name_without_extension_uuid) text, tokens, keywords = extract(os.path.join(mypath, filename)) totalvocab_stemmed.extend(stem(tokens)) totalvocab_tokenized.extend(tokens) total_text.append(text) file_names.append(filename) return total_text, totalvocab_stemmed, totalvocab_tokenized, file_names
def setUpClass(self): if os.path.exists('temp'): shutil.rmtree('temp') os.mkdir('temp') os.mkdir('temp/discussion') os.mkdir('temp/methods') os.mkdir('temp/all_text') os.mkdir('temp/images') from extractor import extract self.responses = {'10.1101/2020.11.25.20238915': self.normalize_json(extract('10.1101/2020.11.25.20238915', False, False, False)), '10.1101/2020.11.10.374587': self.normalize_json(extract('10.1101/2020.11.10.374587', True, False, False)), '10.1101/2020.11.24.20238287': self.normalize_json(extract('10.1101/2020.11.24.20238287', False, False, False))} self.test_cases = {'10.1101/2020.11.25.20238915': json.loads(open('test/extract_test_cases/10.1101_2020.11.25.20238915.json', 'r', encoding='utf-8').read()), '10.1101/2020.11.10.374587': json.loads(open('test/extract_test_cases/10.1101_2020.11.10.374587.json', 'r', encoding='utf-8').read()), '10.1101/2020.11.24.20238287': json.loads(open('test/extract_test_cases/10.1101_2020.11.24.20238287.json', 'r', encoding='utf-8').read())}
def prepare_patches(self, pose, out_fname): markers = { 'leye': np.concatenate((np.arange(17,22), np.arange(36,42))), 'nose': np.arange(27,36), 'mouth': np.arange(48,68), 'beye': np.asarray([21,22,42,28,39]), 'lmouth': np.asarray([36,39,31,48]), } print 'Prepare patches for database {}'.format(out_fname) with h5py.File(self.path_out+out_fname, 'r+') as hf: for subject_k,subject_v in hf[pose+'/'].items(): for segment_k,segment_v in hf[pose+'/'+subject_k+'/'].items(): print '{} of dataset {}'.format(segment_k, pose+'/'+subject_k) faces, lms = segment_v['faces'], segment_v['lms'] patches = {'leye':[], 'beye':[], 'mouth':[], 'lmouth':[], 'nose':[]} for i, (face, lm) in enumerate(zip(faces, lms)): # Extract patches for k,v in markers.items(): if np.sum(lm)==0: print 'There where no valid landmarks for this sample' patch = np.zeros((56, 56, 3)) else: patch = extract(face, square_bbox(lm[v]), extension=1.3, size=56)[0] patches[k].append(patch) for k,v in markers.items(): ''' print k print np.asarray(patches[k]).shape ''' segment_v.create_dataset(k, data=np.asarray(patches[k]))
def run(): text = "cagrUFSC2calendar transforma sua grade de horarios em um formato para calendario (.ics)\n\ Por padrão, as repetições dos eventos das matérias estão ligadas à data do fim do semestre\n\ da graduação na UFSC. Entretanto, você pode definir um numero personalizado de repetições\n\ com --repeat NUM ou uma data de fim --end Y-m-d" parser = argparse.ArgumentParser( description=text, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("file", help="grade de horarios em HTML") parser.add_argument("output", help="nome do arquivo de saida") parser.add_argument("--repeat", help="quantidade de repetições dos eventos") parser.add_argument("--end", help="data final para computar repetições") args = parser.parse_args() FILE = args.file OUTPUT = args.output REPEAT = args.repeat END_DATE = args.end if not END_DATE: END_DATE = "2020-7-14" print("Starting to extract information from", FILE, "...") daily_events, day2abrev, day2key, code2name = extract(FILE) print("Building calendar...") build(OUTPUT, END_DATE, REPEAT, daily_events, day2abrev, day2key, code2name)
def get_times(paths, runs, pattern): """ extracts times """ time_min = [] time_mean = [] time_std = [] fails = [] for path in paths: temptime = [] fails.append(0.) for run in runs: tempnew = ex.extract(path + 'output' + str(run), pattern, isarray=False) if isinstance(tempnew, pl.ndarray): if tempnew: temptime.append(tempnew[0]) else: fails[-1] += 1. else: temptime.append(tempnew) time_min.append(min(temptime)) time_mean.append(pl.mean(temptime)) time_std.append(pl.std(temptime)) fails[-1] /= len(runs) return time_min, fails, time_mean, time_std
def digdeep(path='./', prefix='xv', refs=1, color=COLORS[0], linestyle=LINES[0]): """ analyze deep """ offset = 0 for ref in range(max(refs, 1)): if refs == 0: ref = 0 iters = int( ex.extract(path + 'nonlinear.txt', ex.NOXIterPattern)[-1][0]) + 1 else: iters = int( ex.extract(path + 'nonlinear' + str(ref) + '.txt', ex.NOXIterPattern)[-1][0]) + 1 print(prefix) print('iters:', iters) n_modes = 0 bla = pl.loadtxt(path + prefix + '_' + str(ref) + '_' + str(1) + '.txt') if bla.ndim == 1: n_modes = 1 else: n_modes = bla.shape[0] print('#modes:', n_modes) print(bla) norms = pl.zeros([iters, n_modes]) for i in range(1, iters): print( pl.loadtxt(path + prefix + '_' + str(ref) + '_' + str(i) + '.txt')) if n_modes == 1: norms[i, :] = \ pl.loadtxt(path+prefix+'_'+str(ref)+'_'+str(i)+'.txt')[-1] else: norms[i, :] = pl.loadtxt(path + prefix + '_' + str(ref) + '_' + str(i) + '.txt')[:, -1] for j in range(n_modes): pl.semilogy(pl.arange(1, iters) + offset, norms[1:, j], color=color, linestyle=linestyle, marker=MARKERS[j]) offset += iters - 1 print()
def main(): import urllib from extractor import extract, extract_img # japanese site try: uri = 'http://www.gizmodo.jp/2012/09/post_10869.html' print uri, '\n' print extract(urllib.urlopen(uri).read())['body'], '\n' print extract_img(urllib.urlopen(uri).read()), '\n' except: print sys.exc_info()[0], sys.exc_info()[1] try: uri = 'http://labs.cybozu.co.jp/blog/nakatani/2007/09/web_1.html' print uri, '\n' print extract(urllib.urlopen(uri).read())['body'], '\n' print extract_img(urllib.urlopen(uri).read()), '\n' except: print sys.exc_info()[0], sys.exc_info()[1] # english site try: uri = 'http://capturevision.wordpress.com/2009/08/05/music-visualizer-progress/' print uri, '\n' print extract(urllib.urlopen(uri).read())['body'], '\n' print extract_img(urllib.urlopen(uri).read()), '\n' except: print sys.exc_info()[0], sys.exc_info()[1] try: uri = 'http://www.dasprinzip.com/prinzipiell/2012/04/03/shaders-in-their-natural-habitat-episode-1/' print uri, '\n' print extract(urllib.urlopen(uri).read())['body'], '\n' print extract_img(urllib.urlopen(uri).read()), '\n' except: print sys.exc_info()[0], sys.exc_info()[1] try: uri = 'http://www.generatorx.no/20101217/abstrakt-abstrakt-jorinde-voigt/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+generatorx+%28Generator.x%3A+Generative+strategies+in+art+%26+design%29' print uri, '\n' print extract(urllib.urlopen(uri).read())['body'], '\n' print extract_img(urllib.urlopen(uri).read()), '\n' except: print sys.exc_info()[0], sys.exc_info()[1]
def extract_url(): url = request.args.get('url', '') if not URL_REGEX.match(url): return jsonify({ 'type': 'error', 'message': 'Invalid URL' }), 406 return jsonify(type='success', message=extract(url))
def handle_compressed_file(file_path, compressed_file_name): ''' Handles the processes of decompressing a file and processing (tokenizing, parsing, etc.) its contents. Parameters ---------- file_path : str Path of compressed file in the filing system. compressed_file_name : str Name of compressed file. Returns ---------- total_text : list list of raw texts from each document. totalvocab_stemmed : list list of stemmed tokens. totalvocab_tokenized : list list of tokens. file_names : list list of file names. ''' myid = session['myid'] decompress(file_path, compressed_file_name) compressed_file_name_without_extension = compressed_file_name.split('.')[0] compressed_file_name_without_extension_uuid = compressed_file_name_without_extension + \ "_" + str(myid) compressed_file_name_uuid = compressed_file_name + "_" + str(myid) session['compressed_file_name'] = compressed_file_name session[ 'compressed_file_name_without_extension'] = compressed_file_name_without_extension session[ 'compressed_file_name_without_extension_uuid'] = compressed_file_name_without_extension_uuid session['compressed_file_name_uuid'] = compressed_file_name_uuid totalvocab_stemmed = [] totalvocab_tokenized = [] total_text = [] file_names = [] for filename in os.listdir( 'uploads/extracted/' + str(compressed_file_name_without_extension_uuid)): mypath = 'uploads/extracted/' + \ str(compressed_file_name_without_extension_uuid) text, tokens, keywords = extract(os.path.join(mypath, filename)) totalvocab_stemmed.extend(stem(tokens)) totalvocab_tokenized.extend(tokens) total_text.append(text) file_names.append(filename) return total_text, totalvocab_stemmed, totalvocab_tokenized, file_names
def test_extract(self): a = [generate_code() for _ in range(5)] ans = _("The natures of {} is {} and you returned {}.") for e in a: stu_ans = extractor.extract(e) corr_ans = corr.extract(e) self.assertEqual(equal_string(corr_ans), equal_string(strip_trailing_space(stu_ans)), ans.format(e, corr_ans, stu_ans))
def make_page_features(pages: np.ndarray) -> pd.DataFrame: """ Calculates page features (site, country, agent, etc) from urls :param pages: Source urls :return: DataFrame with features as columns and urls as index """ tagged = extractor.extract(pages).set_index('page') # Drop useless features features: pd.DataFrame = tagged.drop(['term', 'marker'], axis=1) return features
def run(self): if not self.text: logging.warn("Text is EMPTY!") return itemDataSet = extract(self.text, config.getItemRegexGroup(), config.getFieldMeta()) if self.outputHandler: self.outputHandler.output(itemDataSet, config.getFieldMeta()) else: logging.warn("OutputHandler not found.")
def plot_linear(file_str='./Picard.txt', label=None, save=False, fig=1, offset=0, linestyle='-'): """ plots the linear iteration and achieved tolerance """ lin_iter = ex.extract(file_str, ex.BelosMaxItPattern) linatol = ex.extract(file_str, ex.BelosArTolPattern) print(linatol) pl.figure(fig) if isinstance(lin_iter, float): # wtf lin_iter = [lin_iter] pl.plot(pl.arange(1, len(lin_iter) + 1) + offset, lin_iter, marker='.', label=label, linestyle=linestyle) pl.xlabel('Picard step') pl.ylabel(r'linear iteration steps', ha='left', va='bottom', rotation=0) pl.gca().yaxis.set_label_coords(-0.08, 1.02) pl.gca().get_yaxis().set_major_locator(pl.MaxNLocator(integer=True)) if save: pl.savefig('liniter.pdf', bbox_inches='tight') # pl.figure(fig + 1) pl.semilogy(pl.arange(1, len(linatol) + 1) + offset, linatol, marker='.', label=label, linestyle=linestyle) pl.xlabel('Picard step') # pl.ylabel(r'achieved tolerance of the linear solver', ha='left', # va='bottom', rotation=0) # pl.gca().yaxis.set_label_coords(-0.08, 1.02) pl.ylabel(r'achieved tolerance of the linear solver') pl.gca().get_xaxis().set_major_locator(pl.MaxNLocator(integer=True)) if save: pl.savefig('lintol.pdf', bbox_inches='tight') offset += len(linatol) return offset
def process(self): subtitleTemp = [] for sub in self.subtitles: if type(sub) is dict: for key,value in sub.items(): for index,val in enumerate(value): subtitleTemp.append(val) self.subtitleList = extractor.extract(subtitleTemp)
def extract_url(): url = request.args.get('url', '') err = "enter a valid URL" if not URL_REGEX.match(url): return render_template('index.html', error='"{}"'.format(err)) message = extract(url) return render_template('index.html', headline='"{}"'.format(message[0]), title_out='"{}"'.format(message[2]), news='"{}"'.format(message[1]), text_out='"{}"'.format(message[3]))
def test_extract(self): test_uri = ( "s3://ele-gv-o2/16694214/SFN00000/HRTE5/37N070E/373300N0701200E_20121101000000_BKEYE_100_QCS_00_0_UFO.tif" ) extractor = Generic.GdalExtractor(test_uri) resp = extractor.extract() self.assertIsNotNone(resp) print( json.dumps(resp, sort_keys=True, indent=4, separators=(',', ': ')))
def get_fresh_userdata(): try: tarball = "http://xbmc.svn.sourceforge.net/viewvc/xbmc/branches/xbox/userdata.tar.gz?view=tar" targz = os.path.join(PLATFORM_DIR, "userdata.tar.gz") fp, h = urllib.urlretrieve(tarball, targz) from extractor import extract OK = extract(fp, PLATFORM_DIR)[1] os.remove(targz) del extract except: print_exc()
def process_examples_folder(examples_folder): for class_num in classes.keys(): class_path = os.path.join(examples_folder, class_num) for example in os.listdir(class_path): example_path = os.path.join(class_path, example) if os.path.isfile(example_path): continue count = int(open(os.path.join(example_path, 'count.txt')).read()) for frame_number in frame_numbers_for_count(count): frame_path = os.path.join(example_path, "frame-%04d.jpg" % frame_number) frame_features = extractor.extract(frame_path) for feature in feature_names: sys.stdout.write('%.15f,' % frame_features[feature]) sys.stdout.write('%s\n' % class_num)
def build(self): Window.clearcolor = (0, 0, 0, 1) Window.fullscreen = 'auto' Window.bind(on_key_down=self.press) #create data dictionary self.info = extract('telemetry2.xlsx') self.main.info = self.info self.diagnostics.info = self.info self.diagnostics.tabs.info = self.info #Clock.schedule_interval(self.readserial, 0.05) Clock.schedule_interval(self.noserial, 0.5) return self.sm
def extractItem(self, msgFunc=None, progressBar=None): """ Extract item in temp location Update: temp_item_path install_path name """ #TODO: update a progress bar during extraction status = "OK" # Status of download :[OK | ERROR | CANCELED] percent = 33 # Check if the archive exists xbmc.log("extractItem", xbmc.LOGDEBUG) if (os.path.exists(self.itemInfo["raw_item_path"]) and TYPE_SYSTEM_ARCHIVE == self.itemInfo["raw_item_sys_type"]): if progressBar != None: progressBar.update(percent, "Extraction:", (self.itemInfo["name"])) import extractor process_error = False # Extraction in cache directory (if OK copy later on to the correct location) file_path, OK = extractor.extract( self.itemInfo["raw_item_path"], destination=self.CACHEDIR, report=True) xbmc.log("extractItem - file_path: %s" % file_path, xbmc.LOGDEBUG) if file_path == "": installError = _(30139) % os.path.basename( self.itemInfo["raw_item_path"]) xbmc.log( "ArchItemInstaller - extractItem: Error during the extraction of %s - impossible to extract the name of the directory " % os.path.basename(self.itemInfo["raw_item_path"]), xbmc.LOGNOTICE) status = "ERROR" else: # Extraction successful self.itemInfo["temp_item_path"] = file_path del extractor percent = 100 if progressBar != None: progressBar.update(percent, _(30182), self.itemInfo["name"]) else: xbmc.log( "extractItem - Archive does not exist - extraction impossible", xbmc.LOGNOTICE) status = "ERROR" return status
def find_stars(self, catalogue_name=None, cattype='imcore', extension=1): """ Find stars on image. Parameters ----------- catalogue_name: string, optional file name of extracted catalogue cattype: string, optional type of catalogue (imcore|sextractor|python) extension: integer, optional catalogue extension to read """ self.cattype=cattype if self.cattype=='imcore': logger.info('Using imcore object extractor') if not catalogue_name: catalogue_name = self.image_name.replace('.fit', '_cat.fit') if not os.access(catalogue_name, os.R_OK): imcore(self.image_name, confmap='auto') cat = fits.open(catalogue_name) xcat = cat[extension].data.field('x_coordinate') ycat = cat[extension].data.field('y_coordinate') # We read in classification and ellipticity # and select only point-like objects classification = cat[extension].data.field('classification') ellipticity = cat[extension].data.field('ellipticity') mask = (classification == -1) & (ellipticity < 0.1) xcat, ycat = xcat[mask], ycat[mask] cat.close() elif self.cattype=='sextractor': logger.error('Sextractor catalogue type not yet implemented') xcat = ycat = 0.0 elif self.cattype=='python': logger.info('Using Python object extractor') import extractor t = extractor.extract(self.fh[extension].data) xcat, ycat = t['x'], t['y'] return (xcat, ycat)
def main(): input_filename = 'data/input00.txt' output_filename = 'data/output00.txt' (train_features, train_labels,test_features, test_labels) = ex.extract(input_filename, output_filename) classifiers = { "NB Multinomial" : MultinomialNB(), "NB Gaussian": GaussianNB(), "Logistic Regression" : LogisticRegression(C=1e5, tol=0.001, fit_intercept=True), "Decision Tree" : DecisionTreeClassifier(min_samples_split=1, random_state=0), "KNN" : KNeighborsClassifier(n_neighbors=3), "SVM" : SVC(gamma=2, C=1), "LDA" : LDA(), "QDA" : QDA(reg_param=0.5), "Random Forest" : RandomForestClassifier(n_estimators=200), "AdaBoost" : AdaBoostClassifier(n_estimators=200), } print "-"*80, "\n", "Raw Dataset", "\n", "-"*80 for name, classifier in classifiers.iteritems(): clf = classifier.fit(train_features,train_labels) print name, clf.score(test_features,test_labels) print "-"*80, "\n", "Scaled Feature Dataset", "\n", "-"*80 for name, classifier in classifiers.iteritems(): (new_features,new_lables) = scaled_features(train_features, train_labels) clf = classifier.fit(new_features,new_lables) (new_test_features,new_test_lables) = scaled_features(train_features, train_labels) print name, clf.score(new_test_features,new_test_lables) print "-"*80, "\n", "Lasso Feature Selection", "\n", "-"*80 for name, classifier in classifiers.iteritems(): (new_features,new_lables) = extract_features(extract_lasso_features_indexes(train_features, train_labels),train_features, train_labels) clf = classifier.fit(new_features,new_lables) (new_test_features,new_test_lables) = extract_features(extract_lasso_features_indexes(train_features, train_labels),test_features,test_labels) print name, clf.score(new_test_features,new_test_lables) print "-"*80, "\n", "Linear Feature Selection", "\n", "-"*80 for name, classifier in classifiers.iteritems(): (new_features,new_lables) = extract_features(extract_linear_features_indexes(train_features, train_labels),train_features, train_labels) clf = classifier.fit(new_features,new_lables) (new_test_features,new_test_lables) = extract_features(extract_linear_features_indexes(train_features, train_labels),test_features,test_labels) print name, clf.score(new_test_features,new_test_lables)
def extractItem( self, msgFunc=None,progressBar=None ): """ Extract item in temp location Update: temp_item_path install_path name """ #TODO: update a progress bar during extraction status = "OK" # Status of download :[OK | ERROR | CANCELED] percent = 33 # Check if the archive exists xbmc.log("extractItem", xbmc.LOGDEBUG) if ( os.path.exists( self.itemInfo[ "raw_item_path" ] ) and TYPE_SYSTEM_ARCHIVE == self.itemInfo[ "raw_item_sys_type" ] ): if progressBar != None: progressBar.update( percent, "Extraction:", ( self.itemInfo [ "name" ] ) ) import extractor process_error = False # Extraction in cache directory (if OK copy later on to the correct location) file_path, OK = extractor.extract( self.itemInfo [ "raw_item_path" ], destination=self.CACHEDIR, report=True ) xbmc.log("extractItem - file_path: %s"%file_path, xbmc.LOGDEBUG) if file_path == "": installError = _( 30139 ) % os.path.basename( self.itemInfo[ "raw_item_path" ] ) xbmc.log("ArchItemInstaller - extractItem: Error during the extraction of %s - impossible to extract the name of the directory " % os.path.basename( self.itemInfo [ "raw_item_path" ] ), xbmc.LOGNOTICE) status = "ERROR" else: # Extraction successful self.itemInfo[ "temp_item_path" ] = file_path del extractor percent = 100 if progressBar != None: progressBar.update( percent, _( 30182 ), self.itemInfo [ "name" ] ) else: xbmc.log("extractItem - Archive does not exist - extraction impossible", xbmc.LOGNOTICE) status = "ERROR" return status
def extractItem( self, msgFunc=None,progressBar=None ): """ Extract item in temp location """ #TODO: update a progress bar during extraction print "extractItem - path" print self.downloadArchivePath status = "OK" # Status of download :[OK | ERROR | CANCELED] percent = 33 # Check if the archive exists if os.path.exists( self.downloadArchivePath ): if progressBar != None: progressBar.update( percent, "Extraction:", ( self.name ) ) # if not self.downloadArchivePath.endswith( 'zip' ) and not self.downloadArchivePath.endswith( 'rar' ): # # Unknow extention, let's try few well known types: # # zip # self.downloadArchivePath = self.downloadArchivePath + ".zip" if self.downloadArchivePath.endswith( 'zip' ) or self.downloadArchivePath.endswith( 'rar' ): import extractor process_error = False # Extraction in cache directory (if OK copy later on to the correct location) file_path, OK = extractor.extract( self.downloadArchivePath, report=True ) if Item.TYPE_SCRAPER + "_" in self.type: # Scrapers # ---------------- if ( OK == bool( file_path ) ) and os.path.exists( file_path ): # Extraction sucessfull self.destinationPath = self.typeInstallPath self.extractedDirPath = file_path # Get scraper file's name try: self.scraperFileList = os.listdir( str( self.extractedDirPath ) ) self.installName = os.path.splitext(self.scraperFileList[0])[0] print "self.scraperFileList" print self.scraperFileList except Exception, e: print "ArchItemInstaller: Exception in extractItem while listing scraper files: %s" % self.extractedDirPath print_exc() else: status = "ERROR" else: # Cas des scripts et plugins # -------------------------- # Recuperons le nom du repertoire a l'interieur de l'archive: dirName = "" if ( OK == bool( file_path ) ) and os.path.exists( file_path ): dirName = os.path.basename( file_path ) if dirName == "": installError = _( 139 ) % os.path.basename( self.downloadArchivePath ) print "Error during the extraction of %s - impossible to extract the name of the directory " % os.path.basename( self.downloadArchivePath ) status = "ERROR" else: # Extraction successful self.destinationPath = os.path.join( self.typeInstallPath, os.path.basename( file_path ) ) self.installName = os.path.basename( file_path ) self.extractedDirPath = file_path print self.destinationPath #TODO: add skin case (requirements need to be defined first) del extractor #print self.type #print self.destinationPath #print self.extractedDirPath percent = 100 if progressBar != None: progressBar.update( percent, _( 182 ), self.name )
def main(): for testcase in testcases: print '[%s] %s %s' % (datetime.today().strftime('%Y-%m-%d %H:%M:%S'), '\033[35m' + 'testing' + '\033[0m', testcase['uri']) evaluate(testcase, extract(urllib.urlopen(testcase['uri']).read()))
def createExcel(): [plantgoed, verkoopbare, coniferen, bomen] = extract() font0 = Font() font0.name = 'Arial' font0.colour_index = 0 font0.bold = False font0.height = 10*20 text = XFStyle() text.font = font0 prijs = XFStyle() prijs.font = font0 prijs.num_format_str = '"$"#,##0.00_);("$"#,##' ver = open_workbook('extractor/verk.xls',formatting_info=True) plg = open_workbook('extractor/plg.xls',formatting_info=True) all = open_workbook('extractor/all.xls',formatting_info=True) verkoopwb = copy(ver) plgwb = copy(plg) allwb = copy(all) verkoopwb.add_style(text) verkoopwb.add_style(prijs) plgwb.add_style(text) plgwb.add_style(prijs) allwb.add_style(text) allwb.add_style(prijs) #all plg1 = allwb.get_sheet(0) verk1 = allwb.get_sheet(1) con1 = allwb.get_sheet(2) #verkoopbare verk2 = verkoopwb.get_sheet(0) con2 = verkoopwb.get_sheet(1) bomen2 = verkoopwb.get_sheet(2) #plgList plg2 = plgwb.get_sheet(0) plgc = 14 verkc = 14 conc = 14 bomc = 14 for i in xrange(len(plantgoed)): plg1.write(plgc, 0, plantgoed[i]['beschrijving'], text) plg1.write(plgc, 1, plantgoed[i]['aantal'], text) plg1.write(plgc, 2, plantgoed[i]['prijs'], prijs) plg2.write(plgc, 0, plantgoed[i]['beschrijving'], text) plg2.write(plgc, 1, plantgoed[i]['aantal'], text) plg2.write(plgc, 2, plantgoed[i]['prijs'], prijs) plgc+=1 for i in xrange(len(verkoopbare)): verk1.write(verkc, 0, verkoopbare[i]['beschrijving'], text) verk1.write(verkc, 1, verkoopbare[i]['aantal'], text) verk1.write(verkc, 2, verkoopbare[i]['prijs'], prijs) verk2.write(verkc, 0, verkoopbare[i]['beschrijving'], text) verk2.write(verkc, 1, verkoopbare[i]['aantal'], text) verk2.write(verkc, 2, verkoopbare[i]['prijs'], prijs) verkc+=1 for i in xrange(len(coniferen)): con1.write(conc, 0, coniferen[i]['beschrijving'], text) con1.write(conc, 1, coniferen[i]['aantal'], text) con1.write(conc, 2, coniferen[i]['prijs'], prijs) con2.write(conc, 0, coniferen[i]['beschrijving'], text) con2.write(conc, 1, coniferen[i]['aantal'], text) con2.write(conc, 2, coniferen[i]['prijs'], prijs) conc+=1 for i in xrange(len(bomen)): bomen2.write(bomc, 0, bomen[i]['beschrijving'], text) bomen2.write(bomc, 1, bomen[i]['aantal'], text) bomen2.write(bomc, 2, bomen[i]['prijs'], prijs) bomc+=1 year = datetime.datetime.now().year #insert kapelhof logo to each worksheet: plg1.insert_bitmap('extractor/kapelhof.bmp', 0, 1) plg2.insert_bitmap('extractor/kapelhof.bmp', 0, 1) verk1.insert_bitmap('extractor/kapelhof.bmp', 0, 1) verk2.insert_bitmap('extractor/kapelhof.bmp', 0, 1) con1.insert_bitmap('extractor/kapelhof.bmp', 0, 1) con2.insert_bitmap('extractor/kapelhof.bmp', 0, 1) bomen2.insert_bitmap('extractor/kapelhof.bmp', 0, 1) verkoopwb.save('voorraadslijst_verkoopbare_planten_' + str(year) + '-' + str(year+1) + '.xls') plgwb.save('voorraadslijst_plantgoed_' + str(year) + '-' + str(year+1) + '.xls') allwb.save('voorraadslijst_' + str(year) + '-' + str(year+1) + '.xls') return True
#!/usr/bin/env python import extractor from extractor import Keypoint import numpy as np """ This is extractor file, it creates a keypoint """ kps = [Keypoint(1.0, 2.0, 3.0), Keypoint(4.0, 5.0, 6.0), Keypoint(7.0, 8.0, 9.0)] output = extractor.extract(kps) print "Output: %s" %(str(output)) print "" def do(): """ this function does stuff """ for kp in kps: print kp do()
# download dret = None if len(files) == 0: downloader.download(conns, downloadPath, pars, False, guiInfo) else: dret = downloader.download(conns, downloadPath, files, False, guiInfo) # verify / extract vret = None if not len(files) == 0: if cfg.get("automation","verify") == "true": vret = verifier.verify(conns, downloadPath, pars) # TODO: extract download if cfg.get("automation","extract") == "true": if vret != -1: extractor.extract(downloadPath) # generate nfo file if dret: vresult = "" if vret == None or vret == 2: vresult = "???" elif vret == 1: vresult = "Passed" elif vret == 3: vresult = "Repaired" else: vresult = "Failed" f = open(downloadPath + "newzBoy.nfo", "w") f.write(utils.nfo(folderName, dret["size"], dret["files"], dret["speed"], dret["time"],vresult))