def prepare_data(): data.clean() url = ("https://api.stackexchange.com/2.2/questions?page=1&pagesize=99" "&order=desc&sort=creation&tagged=php&site=stackoverflow") request = urllib2.Request(url) response = urllib2.urlopen(request) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) result = f.read() helper.prepare_data(result)
def split_and_write(file1, data1, file2, data2): with open(file1, "at") as f1: with open(file2, "at") as f2: # z = match(data1, data2) # if z > 0.05: # d1_count, d1_cleaned = clean_and_split(data1) # d2_count, d2_cleaned = clean_and_split(data2) # if (d1_count == d2_count): # f1.write(encode(d1_cleaned)) # f2.write(encode(d2_cleaned)) # else: # f1.write(encode(clean(data1))) # f2.write(encode(clean(data2))) # else: # print("'{}' unlike '{}'\n".format(data1, data2)) f1.write(encode(clean(data1))) f2.write(encode(clean(data2)))
def clean_up(): """For use when an emergency problem causes an unsafe shutdown. Clears out system files and returns to a stable state.""" logging.info('Cleaning up system.') logging.debug('Changing working directory.') os.chdir('./secner/bin/') logging.debug('Requesting permission to continue.') permission = helpers.query( 'Note that running this proccess will result in deletion of potentially valuable content. Are you absolutely sure you would wish to proceed?' ) if not permission: logging.debug('User did not permit running the service.') sys.exit('Process stopped by user.') logging.debug('Permission recieved. Continuing with process.') if os.path.exists('./indexes/'): logging.debug('Clearing indexes.') status = os.system('rm -rf ./indexes/') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') if os.path.exists('./tests/'): logging.debug('Clearing tests.') status = os.system('rm -rf ./tests/*') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') if os.path.exists('./.secner-helpers/'): logging.debug('Clearing secner-helpers.') status = os.system('rm -rf ./secner-helpers/*') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') try: logging.debug('Clearing Neo4j Server.') data.clean(config.NEO4J_SERVER) except: logging.error('Something went wrong while clearing the Neo4j server.') logging.exception(sys.exc_type + ':' + sys.exc_value) sys.exit( 'Something went wrong while clearing the Neo4j server. Check logs for details.' ) logging.debug('Clearing complete.') sys.exit('System cleaned. Factory state returned.')
def process(data, url=None): tree = html.fromstring(data) utterances = tree.xpath( '//div[@id="noFear-comparison"]/table[@class="noFear"]/tr') for e in utterances: # for original lines speaker = e.xpath('td[@class="noFear-left"]/b/text()') # # if speaker: # print("Speaker: {}".format(speaker)) original_line = "" # for element in e.xpath('td[@class="noFear-left"]/div[@class="original-line"]/text()'): for element in e.xpath( 'td[@class="noFear-left"]/div[@class="original-line"]'): etree.strip_tags(element, 'a') original_line = original_line + ' ' + element.text_content() original_line = clean(original_line) print("Shakespeare: {}".format(original_line)) modern_line = "" for element in e.xpath( 'td[@class="noFear-right"]/div[@class="modern-line"]'): etree.strip_elements(element, 'span', with_tail=False) etree.strip_tags(element, 'a') modern_line = modern_line + ' ' + element.text_content() modern_line = clean(modern_line) print("Modern: {}".format(modern_line)) with open('utterances.pickle', 'a+b') as f: pickle.dump( { "speaker": speaker, "shakespeare": original_line, "modern": modern_line, "url": url }, f)
def sample(self, step, temperature=1., init=None): if not os.path.isdir('Samples'): os.mkdir('Samples') roll = self.generate(temperature, init) roll = clean(roll) save_roll(roll, step) midi = piano_rolls_to_midi(roll) midi.write('Samples/{}.mid'.format(step)) tqdm.write('Saved to Samples/{}.mid'.format(step)) roll = np.expand_dims(roll.T, axis=0) return roll
def clean_up(): """For use when an emergency problem causes an unsafe shutdown. Clears out system files and returns to a stable state.""" logging.info('Cleaning up system.') logging.debug('Changing working directory.') os.chdir('./secner/bin/') logging.debug('Requesting permission to continue.') permission = helpers.query('Note that running this proccess will result in deletion of potentially valuable content. Are you absolutely sure you would wish to proceed?') if not permission: logging.debug('User did not permit running the service.') sys.exit('Process stopped by user.') logging.debug('Permission recieved. Continuing with process.') if os.path.exists('./indexes/'): logging.debug('Clearing indexes.') status = os.system('rm -rf ./indexes/') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') if os.path.exists('./tests/'): logging.debug('Clearing tests.') status = os.system('rm -rf ./tests/*') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') if os.path.exists('./.secner-helpers/'): logging.debug('Clearing secner-helpers.') status = os.system('rm -rf ./secner-helpers/*') if status == 256: error('Permission denied.') elif status != 0: error('Something went wrong.') try: logging.debug('Clearing Neo4j Server.') data.clean(config.NEO4J_SERVER) except: logging.error('Something went wrong while clearing the Neo4j server.') logging.exception(sys.exc_type+':'+sys.exc_value) sys.exit('Something went wrong while clearing the Neo4j server. Check logs for details.') logging.debug('Clearing complete.') sys.exit('System cleaned. Factory state returned.')
def main(): os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = "./resources/credentials/e3682f457e02.json" os.system("cls") project_id = "recruitertest-dd3ab" session_id = str(uuid.uuid4().hex[:12]) language_code = "en-US" input_file_path = "./resources/audio/subject_input.wav" # [START DIALOG] complete_transcript = [[], []] # [QUICK DIALOG] # complete_transcript = detect_intent_texts(project_id, session_id, [ # "DEMO_START", "Diana Moon", "18", "Adaptable , ambitious, clever and blunt", "I have a bachelor in mathematics", # "I have worked for Google as a data scientist of the past 3 years", "I am good at analysis and computers", "My communication is not great", "40 hours", "No"], language_code) # [NORMAL DIALOG] detect_intent_texts(project_id, session_id, ["DEMO_START"], language_code) while True: # text_input = input("Text input: ") # partial_transcript = detect_intent_texts(project_id, session_id, [ # text_input], language_code) audio.record(input_file_path) partial_transcript = detect_intent_audio(project_id, session_id, input_file_path, language_code) # audio.record(input_file_path) # partial_transcript = detect_intent_stream(project_id, session_id, # input_file_path, language_code) complete_transcript[0] = complete_transcript[0] + partial_transcript[0] complete_transcript[1] = complete_transcript[1] + partial_transcript[1] if poke(project_id, session_id, language_code): break # [END DIALOG] # [DATA] subject_info = get_subject_info(project_id, session_id, language_code) clean_subject_info = data.clean(subject_info) match_scores = data.match(subject_info) report.create(clean_subject_info, match_scores, complete_transcript, session_id)
def sample(self, step, temperature=1., init=None, nonzero=None, diff=None, nonzero_diff=None, condition=None, length=2048): if not os.path.isdir('Samples'): os.mkdir('Samples') roll = self.generate(temperature, init, nonzero, nonzero_diff, condition, length).detach().cpu().numpy() roll = clean(roll) save_roll(roll, step) midi = piano_rolls_to_midi(roll) midi.write('Samples/{}.mid'.format(step)) tqdm.write('Saved to Samples/{}.mid'.format(step)) roll = np.expand_dims(roll.T, axis=0) return roll
def OnInit(self): Datlocal = open('swamp.txt', 'r') name = Datlocal.read(8) lake = data.get(name) laker = data.clean(lake) dat = coords(laker) datas = array(dat) print datas raw_input("wait:") VERTEX_SHADER = shaders.compileShader( """ void main() { gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex; }""", GL_VERTEX_SHADER) FRAGMENT_SHADER = shaders.compileShader( """ void main() { gl_FragColor = vec4( 0, 1, 0, 1 ); }""", GL_FRAGMENT_SHADER) self.shader = shaders.compileProgram(VERTEX_SHADER, FRAGMENT_SHADER) self.vbo = vbo.VBO(array(datas, 'f'))
def OnInit( self): Datlocal = open('swamp.txt','r') name = Datlocal.read(8) lake = data.get(name) laker = data.clean(lake) dat = coords(laker) datas = array(dat) print datas raw_input("wait:") VERTEX_SHADER = shaders.compileShader(""" void main() { gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex; }""", GL_VERTEX_SHADER) FRAGMENT_SHADER = shaders.compileShader(""" void main() { gl_FragColor = vec4( 0, 1, 0, 1 ); }""", GL_FRAGMENT_SHADER) self.shader = shaders.compileProgram(VERTEX_SHADER,FRAGMENT_SHADER) self.vbo = vbo.VBO( array( datas,'f') )
import sys sys.path.append('src') from data import read_raw_data, clean, vectorize from xgboost import XGBClassifier from sklearn.model_selection import train_test_split from visualisation import plot_precision_recall_vs_threshold from sklearn.metrics import confusion_matrix if __name__ == "__main__": df = read_raw_data('data\\raw\\A_training data.csv') df = clean(df) features = df.name.values labels = df.name_generic.values train_features, test_features, train_labels, test_labels = train_test_split( features, labels, stratify=labels, test_size=0.2) train_features, vectorizer = vectorize(train_features) test_features = vectorizer.transform(test_features) weight = (train_labels == 0).sum() / (train_labels == 1).sum() clf = XGBClassifier(scale_pos_weight=weight, max_depth=9, n_estimators=300, learning_rate=0.35, gamma=0.08, subsample=0.7) clf.fit(train_features, train_labels) predictions = clf.predict_proba(test_features)
cont = False pInput = string.upper(raw_input("File Loaded! Would you like to prepare it for rendering?(y/n)")) if pInput == 'Y': cont = False while cont == False: numInput = str(raw_input("Please input point size as a float less than 1:")) if "0." in numInput: print("Size recognised") cont = True elif "0." not in numInput: print("You didn't enter a float less than 1. Try Again") cont = False sizeConv = float(numInput) print("PDB Renderer will now prepare the data") darter = data.clean(dart) forge = extStrip(fInput) print(forge) heckler = open(forge, "w") top = print_normal((0, 1, 0), heckler) bottom = print_normal((0, -1, 0), heckler) left = print_normal((-1, 0, 0), heckler) right = print_normal((1, 0, 0), heckler) front = print_normal((0, 0, -1), heckler) back = print_normal((0, 0, 1), heckler) for lline in darter: print(lline) coordData = render.coords(darter) noot = [] for atom in coordData: print(atom)
def main(args): """ Main function to run experiment on PARIS """ if args.fold_number == "full": valid_list = [] test_list = [] truth_list = [] for fold in range(1,6): args.fold_number = str(fold) triples1, triples2, folder = data.load_kgs(args.training_data, args.dataset_type, args.triples_type, args.dataset_ea) if args.seed: kg1_path, kg2_path = data.seed_kgs(args.dataset_type, triples1, triples2, folder, args.new_seed, args.dataset_division, args.fold_number, args.frac) else: kg1_path, kg2_path = data.add_point(triples1, triples2, folder) # Delete old PARIS directory and create again to be empty out_path = args.output + "PARIS/" + args.dataset_type + "/" + args.dataset_ea if os.path.exists(out_path): shutil.rmtree(out_path) os.mkdir(out_path) # Run PARIS print("Running PARIS...") out_paris = os.popen( "java -jar ../resources/paris_0_3.jar {kg1_path} {kg2_path} {out_path}".format( kg1_path=kg1_path, kg2_path=kg2_path, out_path=out_path ) ).read() print("PARIS output:\n", out_paris) print("\nComputing metrics") if args.dataset_type == "OpenEA_dataset": prec_truth, rec_truth, f1_truth, prec_test, rec_test, f1_test, prec_valid, rec_valid, f1_valid = \ check_result(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number) truth_list.append((prec_truth, rec_truth, f1_truth)) test_list.append((prec_test, rec_test, f1_test)) valid_list.append((prec_valid, rec_valid, f1_valid)) else: check_result(folder, args.dataset_type, out_path) print("\nCleaning") if args.seed and not args.new_seed: data.clean(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number) else: data.clean(folder, args.dataset_type, out_path) print("Truth average precision:", np.mean(np.array([t[0] for t in truth_list]))) print("Truth std precision:", np.std(np.array([t[0] for t in truth_list]))) print("Truth average recall:", np.mean(np.array([t[1] for t in truth_list]))) print("Truth std recall:", np.std(np.array([t[1] for t in truth_list]))) print("Truth average f1:", np.mean(np.array([t[2] for t in truth_list]))) print("Truth std f1:", np.std(np.array([t[2] for t in truth_list]))) print("Test average precision:", np.mean(np.array([t[0] for t in test_list]))) print("Test std precision:", np.std(np.array([t[0] for t in test_list]))) print("Test average recall:", np.mean(np.array([t[1] for t in test_list]))) print("Test std recall:", np.std(np.array([t[1] for t in test_list]))) print("Test average f1:", np.mean(np.array([t[2] for t in test_list]))) print("Test std f1:", np.std(np.array([t[2] for t in test_list]))) print("Valid average precision:", np.mean(np.array([t[0] for t in valid_list]))) print("Valid std precision:", np.std(np.array([t[0] for t in valid_list]))) print("Valid average recall:", np.mean(np.array([t[1] for t in valid_list]))) print("Valid std recall:", np.std(np.array([t[1] for t in valid_list]))) print("Valid average f1:", np.mean(np.array([t[2] for t in valid_list]))) print("Valid std f1:", np.std(np.array([t[2] for t in valid_list]))) else: triples1, triples2, folder = data.load_kgs(args.training_data, args.dataset_type, args.triples_type, args.dataset_ea) if args.seed: kg1_path, kg2_path = data.seed_kgs(args.dataset_type, triples1, triples2, folder, args.new_seed, args.dataset_division, args.fold_number, args.frac) else: kg1_path, kg2_path = data.add_point(triples1, triples2, folder) # Delete old PARIS directory and create again to be empty out_path = args.output + "PARIS/" + args.dataset_type + "/" + args.dataset_ea if os.path.exists(out_path): shutil.rmtree(out_path) os.mkdir(out_path) # Run PARIS print("Running PARIS...") out_paris = os.popen( "java -jar ../resources/paris_0_3.jar {kg1_path} {kg2_path} {out_path}".format( kg1_path=kg1_path, kg2_path=kg2_path, out_path=out_path ) ).read() print("PARIS output:\n", out_paris) print("\nComputing metrics") if args.dataset_type == "OpenEA_dataset": if args.seed: check_result(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number) else: check_result(folder, args.dataset_type, out_path) else: check_result(folder, args.dataset_type, out_path) print("\nCleaning") if args.seed and not args.new_seed: data.clean(folder, args.dataset_type, out_path, args.dataset_division, args.fold_number) else: data.clean(folder, args.dataset_type, out_path) print("\nNothing else to do! Closing")