def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None): """ Utility method to parse a (unicode) string of code. This is mostly used for internal Cython compiler purposes (creating code snippets that transforms should emit, as well as unit testing). code - a unicode string containing Cython (module-level) code name - a descriptive name for the code source (to use in error messages etc.) """ # Since source files carry an encoding, it makes sense in this context # to use a unicode string so that code fragments don't have to bother # with encoding. This means that test code passed in should not have an # encoding header. assert isinstance(code, unicode), "unicode code snippets only please" encoding = "UTF-8" module_name = name if initial_pos is None: initial_pos = (name, 1, 0) code_source = StringSourceDescriptor(name, code) context = StringParseContext([], name) scope = context.find_module(module_name, pos=initial_pos, need_pxd=0) buf = StringIO(code.encode(encoding)) scanner = PyrexScanner( buf, code_source, source_encoding=encoding, scope=scope, context=context, initial_pos=initial_pos ) if level is None: tree = Parsing.p_module(scanner, 0, module_name) else: tree = Parsing.p_code(scanner, level=level) return tree
def parseKVs(self, kvl): """ Convert some form of keys to an OrderedDict. We are trying to be ridiculously flexible here. Take: - a string, which we parse as it came from an ICC. - a list, which we parse either as a list of key=value strings or of (key, value) duples. """ if isinstance(kvl, str): return Parsing.parseKVs(kvl) od = collections.OrderedDict() if kvl is not None: for i in kvl: if isinstance(i, str): k, v, junk = Parsing.parseKV(i) od[k] = v elif type(i) in (list, tuple) and len(i) == 2: k, v, junk = Parsing.parseKV("%s=%s" % i) else: CPL.log('Reply', 'kvl item is not a string: %r' % (i)) raise Exception("kvl == %r" % (i)) return od
def classify_doc(fileName): #Classifies the document as Positive, Negative, or Neutral based on predetermined rules for financial sentiment analysis. #Returns the class name that this document belongs. rtnClassification = None #Open predefined Rule Base. try: file_rulebase = open("Rules/newRules.csv") RuleBase = csv.reader(file_rulebase, delimiter=',') except Exception as e: print("Cannot open RuleBase: Classification_Rules.csv", "\nCan't go further without this file.") exit() ruleBase = [] for r in RuleBase: ruleBase.append(r) #Open document and tokenize by sentence. try: doc = codecs.open(fileName) content = doc.read() tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') Sentences = tokenizer.tokenize(content) except Exception as e: print("Error opening inputted file.", e) exit() #Keeps track of the confidence of the classification. (Pos, Neg, Neu) Classes = [0, 0, 0] #Classify doc by classifying each sentence. for i in range(len(Sentences)): #Gets tags based on pre-defined lexicon. sent_tags = parser.parse_sentence(Sentences[i]) #Converts tags to numerical representation. num_tags = parser.get_numerical_list(sent_tags) for r in ruleBase: rule_r = [int(s) for s in r[0].split() if s.isdigit()] if num_tags == rule_r: #Update confidence from RuleBase to correct class Classes[int(r[1]) - 12] = Classes[int(r[1]) - 12] + float(r[2]) else: for t in num_tags: if rule_r == [t]: Classes[int(r[1]) - 12] = Classes[int(r[1]) - 12] + float(r[2]) if Classes[0] == max(Classes): return "Positive" elif Classes[1] == max(Classes): return "Negative" elif Classes[2] == max(Classes): return "Neutral" else: return "messed up"
def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None, context=None, allow_struct_enum_decorator=False): """ Utility method to parse a (unicode) string of code. This is mostly used for internal Cython compiler purposes (creating code snippets that transforms should emit, as well as unit testing). code - a unicode string containing Cython (module-level) code name - a descriptive name for the code source (to use in error messages etc.) RETURNS The tree, i.e. a ModuleNode. The ModuleNode's scope attribute is set to the scope used when parsing. """ if context is None: context = StringParseContext(name) # Since source files carry an encoding, it makes sense in this context # to use a unicode string so that code fragments don't have to bother # with encoding. This means that test code passed in should not have an # encoding header. assert isinstance(code, unicode), "unicode code snippets only please" encoding = "UTF-8" module_name = name if initial_pos is None: initial_pos = (name, 1, 0) code_source = StringSourceDescriptor(name, code) scope = context.find_module(module_name, pos=initial_pos, need_pxd=0) buf = StringIO(code) scanner = PyrexScanner(buf, code_source, source_encoding=encoding, scope=scope, context=context, initial_pos=initial_pos) ctx = Parsing.Ctx(allow_struct_enum_decorator=allow_struct_enum_decorator) if level is None: tree = Parsing.p_module(scanner, 0, module_name, ctx=ctx) tree.scope = scope tree.is_pxd = False else: tree = Parsing.p_code(scanner, level=level, ctx=ctx) tree.scope = scope return tree
def reebok_parse(*, output=Parsing.database_size_layer_writer, ipp=120): if ipp not in (120, 24): raise ValueError('Unknown items per page value: {}'.format(ipp)) soup_loader = SoupLoader(bot=True, use_proxies=True) ig = ReebokIg() parser = Parsing.BaseParser(get_offers_list=get_offers_list, get_item_dict=ig, soup_loader=soup_loader) size_list = get_reebok_sizes_list(soup_loader=soup_loader) links = Parsing.sl_link_gen(baselinks=reebok_baselinks, sizes_list=size_list, get_pg_lim=get_maxpage_func(ipp=ipp, soup_loader=soup_loader), ipp=ipp) output(parser(links), "reebok")
def parse_from_strings(name, code, pxds=None, level=None, initial_pos=None, context=None, allow_struct_enum_decorator=False): """ Utility method to parse a (unicode) string of code. This is mostly used for internal Cython compiler purposes (creating code snippets that transforms should emit, as well as unit testing). code - a unicode string containing Cython (module-level) code name - a descriptive name for the code source (to use in error messages etc.) RETURNS The tree, i.e. a ModuleNode. The ModuleNode's scope attribute is set to the scope used when parsing. """ if pxds is None: pxds = {} if context is None: context = StringParseContext(name) # Since source files carry an encoding, it makes sense in this context # to use a unicode string so that code fragments don't have to bother # with encoding. This means that test code passed in should not have an # encoding header. assert isinstance(code, unicode), "unicode code snippets only please" encoding = "UTF-8" module_name = name if initial_pos is None: initial_pos = (name, 1, 0) code_source = StringSourceDescriptor(name, code) scope = context.find_module(module_name, pos = initial_pos, need_pxd = 0) buf = StringIO(code) scanner = PyrexScanner(buf, code_source, source_encoding = encoding, scope = scope, context = context, initial_pos = initial_pos) ctx = Parsing.Ctx(allow_struct_enum_decorator=allow_struct_enum_decorator) if level is None: tree = Parsing.p_module(scanner, 0, module_name, ctx=ctx) tree.scope = scope tree.is_pxd = False else: tree = Parsing.p_code(scanner, level=level, ctx=ctx) tree.scope = scope return tree
def parse(self, source_desc, scope, pxd, full_module_name): if not isinstance(source_desc, FileSourceDescriptor): raise RuntimeError("Only file sources for code supported") source_filename = source_desc.filename scope.cpp = self.cpp # Parse the given source file and return a parse tree. try: f = Utils.open_source_file(source_filename, "rU") try: import Parsing s = PyrexScanner(f, source_desc, source_encoding=f.encoding, scope=scope, context=self) tree = Parsing.p_module(s, pxd, full_module_name) finally: f.close() except UnicodeDecodeError, msg: #import traceback #traceback.print_exc() error(( source_desc, 0, 0 ), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
def get_helen_test_data(query_label_names, aug_setting_name): return ps.Dataset( 'HELENRelabeled_wo_pred', category='test', aug_ids=[0], aug_setting_name=aug_setting_name, query_label_names=query_label_names)
def _make_matrix(self): first_row = [ "sentence", "Pos", "Neg", "LagInd", "LeadInd", "LagInd::Up", "LagInd::Down", "LeadInd::Up", "LeadInd::Down", "Up", "Down", "Class" ] parse_tags = [] print("here") with open(self.fileName, encoding="ISO-8859-1") as f: content = f.readlines() class_tags = [] for c in content: split_c = c.split("@", 1) class_tags.append(split_c[1]) tempTags = parser.parse_sentence(split_c[0]) parse_tags.append(tempTags) cor_class_tags = [] for c in class_tags: if "positive" in c: cor_class_tags.append("Positive") elif "negative" in c: cor_class_tags.append("Negative") elif "neutral" in c: cor_class_tags.append("Neutral") else: print("ERROR") if (len(parse_tags) == len(cor_class_tags)): print("good to go") else: print("u r dumb") #for tags in parse_tags: # tag_str = get_binary(tags) train_mat = [] other_mat = [] for i in range(len(parse_tags)): tag_str = get_numerical(parse_tags[i], cor_class_tags[i]) train_mat.append((i + 1, parse_tags[i], cor_class_tags[i])) other_mat.append((tag_str, cor_class_tags[i])) ''' with open(self.csvFileName, 'w') as csvFile: writer = csv.writer(csvFile) writer.writerows(first_row) writer.writerows(train_mat) csvFile.close() ''' with open(self.csvFileName, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerows(other_mat) print("traing matrix is done-so and has been saved: ") '''
def gen_training_data(self, query_label_names, aug_setting_name='aug_512_0.8', dataset_names=[]): datasets = [] if len(dataset_names) == 0: dataset_names = [ 'HELENRelabeled', 'MultiPIE', 'HangYang', 'Portrait724' ] for dataset_name in dataset_names: datasets.append( ps.Dataset(dataset_name, category='train', aug_ids=[0, 1, 2, 3], aug_setting_name=aug_setting_name, query_label_names=query_label_names)) return ps.CombinedDataset(datasets)
def update_db(): collection = DB['chart_info'] if collection.count() is 0: insert_chart() Parsing.delete_album_art() Parsing.download_album_arts() else: remove_documents() insert_chart() Parsing.delete_album_art() Parsing.download_album_arts()
def classify_sentence(sentence): #Open predefined Rule Base. try: file_rulebase = open("Rules/newRules.csv") RuleBase = csv.reader(file_rulebase, delimiter=',') except Exception as e: print("Cannot open RuleBase: Classification_Rules.csv", "\nCan't go further without this file.") exit() ruleBase = [] for r in RuleBase: ruleBase.append(r) #Keeps track of the confidence of the classification. (Pos, Neg, Neu) Classes = [0, 0, 0] #Gets tags based on pre-defined lexicon. sent_tags = parser.parse_sentence(sentence) #Converts tags to numerical representation. num_tags = parser.get_numerical_list(sent_tags) for r in ruleBase: rule_r = [int(s) for s in r[0].split() if s.isdigit()] if num_tags == rule_r: #Update confidence from RuleBase to correct class Classes[int(r[1]) - 12] = Classes[int(r[1]) - 12] + float(r[2]) else: for t in num_tags: if rule_r == [t]: Classes[int(r[1]) - 12] = Classes[int(r[1]) - 12] + float(r[2]) if Classes[0] == max(Classes): return "positive" elif Classes[1] == max(Classes): return "negative" elif Classes[2] == max(Classes): return "neutral" else: print("oooof") return "messed up"
def main(): #pprinter for debugging and visdualizing data usage. pp = pprint.PrettyPrinter(indent=2) #List all files in the cwd. os.listdir('.') new_parser = pa.DataParser() #Empty dict that will contain subdicts representing each row. data_set = new_parser.parse_csv('volunteer_sample_2.csv') pp.pprint(data_set)
def parse(self, source_filename, scope, pxd): # Parse the given source file and return a parse tree. f = open(source_filename, "rU") s = PyrexScanner(f, source_filename, scope = scope, context = self) try: tree = Parsing.p_module(s, pxd) finally: f.close() if Errors.num_errors > 0: raise CompileError return tree
def parse(self, source_filename, scope, pxd): # Parse the given source file and return a parse tree. f = open(source_filename, "rU") s = PyrexScanner(f, source_filename, scope=scope, context=self) try: tree = Parsing.p_module(s, pxd) finally: f.close() if Errors.num_errors > 0: raise CompileError return tree
def parse(self, source_desc, scope, pxd, full_module_name): if not isinstance(source_desc, FileSourceDescriptor): raise RuntimeError("Only file sources for code supported") source_filename = source_desc.filename scope.cpp = self.cpp # Parse the given source file and return a parse tree. num_errors = Errors.num_errors try: f = Utils.open_source_file(source_filename, "rU") try: import Parsing s = PyrexScanner(f, source_desc, source_encoding=f.encoding, scope=scope, context=self) tree = Parsing.p_module(s, pxd, full_module_name) finally: f.close() except UnicodeDecodeError, e: #import traceback #traceback.print_exc() line = 1 column = 0 msg = e.args[-1] position = e.args[2] encoding = e.args[0] f = open(source_filename, "rb") try: byte_data = f.read() finally: f.close() # FIXME: make this at least a little less inefficient for idx, c in enumerate(byte_data): if c in (ord('\n'), '\n'): line += 1 column = 0 if idx == position: break column += 1 error( (source_desc, line, column), "Decoding error, missing or incorrect coding=<encoding-name> " "at top of source (cannot decode with encoding %r: %s)" % (encoding, msg))
def parse(self, source_desc, scope, pxd, full_module_name): if not isinstance(source_desc, FileSourceDescriptor): raise RuntimeError("Only file sources for code supported") source_filename = Utils.encode_filename(source_desc.filename) # Parse the given source file and return a parse tree. try: f = Utils.open_source_file(source_filename, "rU") try: s = PyrexScanner(f, source_desc, source_encoding = f.encoding, scope = scope, context = self) tree = Parsing.p_module(s, pxd, full_module_name) finally: f.close() except UnicodeDecodeError, msg: #import traceback #traceback.print_exc() error((source_desc, 0, 0), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
def parse(self, source_desc, scope, pxd, full_module_name): if not isinstance(source_desc, FileSourceDescriptor): raise RuntimeError("Only file sources for code supported") source_filename = source_desc.filename scope.cpp = self.cpp # Parse the given source file and return a parse tree. num_errors = Errors.num_errors try: f = Utils.open_source_file(source_filename, "rU") try: import Parsing s = PyrexScanner(f, source_desc, source_encoding=f.encoding, scope=scope, context=self) tree = Parsing.p_module(s, pxd, full_module_name) finally: f.close() except UnicodeDecodeError, e: # import traceback # traceback.print_exc() line = 1 column = 0 msg = e.args[-1] position = e.args[2] encoding = e.args[0] f = open(source_filename, "rb") try: byte_data = f.read() finally: f.close() # FIXME: make this at least a little less inefficient for idx, c in enumerate(byte_data): if c in (ord("\n"), "\n"): line += 1 column = 0 if idx == position: break column += 1 error( (source_desc, line, column), "Decoding error, missing or incorrect coding=<encoding-name> " "at top of source (cannot decode with encoding %r: %s)" % (encoding, msg), )
def tmp(): # need for "Optimize imports" time() urllib() bs4() Category() Deepl() FindDigits() Html() LoadDictFromFile() Parsing() Product() SaveDictToFile() Sw() WorkWithJSON() print() datetime() quote() urljoin()
def insert_chart(): collection = DB['chart_info'] chart_info = Parsing.get_chart_info() docs = [] for i in range(0, 100): song_name = chart_info[i]['title'] singer = chart_info[i]['artist'] req_path = '/static/images/' + str(i+1) + '.jpg' insert_data = { 'rank': i+1, 'title': song_name, 'artist': singer, 'request_url': req_path } docs.append(insert_data) collection.insert_many(docs) print('insert done')
def match(self, opts): """ Searches an OrderedDict for matches. Args: argv - an OrderedDict of options. opts - a list of duples to match against. The duple parts are the option name and a converter. If the converter is None, the option takes no argument. Returns: matches - an OrderedDict of the matched options, with converted arguments. unmatched - a list of unmatched options from opts. leftovers - an OrderedDict of unmatched options from argv. Raises: Error - Any parsing or conversion error. """ self.parseArgs() return Parsing.match(self.argDict, opts)
def check_random_row(self, file_name): """ Check if all the values in a random row in the csv file are equivalent to it's corresponding entry in the date_set dict. file_name - Name of the file to be checked. """ data_set = pa.DataParser().parse_csv(file_name) test_tup = self.get_CSV_Reader(file_name) #Pick a random row and iterate to it. row_num = random.randrange(2, 200) row = None for x in range(row_num): row = next(test_tup[1]) #Check that all of the variables in the csv row are equivalent to those #in that row's entry in the data_set dictionary. entry = data_set[row_num] for col, key in zip(row, entry.keys()): self.assertEqual(col, entry[key]) test_tup[0].close()
def scan(self, input): syms = { "+": TokenPlus, "-": TokenMinus, "*": TokenStar, "/": TokenSlash } for word in input.split(" "): if word in syms: token = syms[word](self) else: # Try to convert to an integer. try: i = int(word) except: raise Parsing.SyntaxError("Unrecognized token: %s" % word) token = TokenInt(parser, i) # Feed token to parser. self.token(token) # Tell the parser that the end of input has been reached. self.eoi()
def decision_generator(self): '''Reads dna to decide next course of action. Outputs verbiage''' parser = P.Parser(self.dna) while self.alive: try: thought = next(parser) sd.print3("{0.name}'s thought process: \n{thought}", self, thought=thought.tree) sd.print3( 'which required {0.icount} instructions and {0.skipped} ' 'instructions skipped over', thought) self.instr_used += thought.icount self.instr_skipped += thought.skipped except P.TooMuchThinkingError as tmt: sd.print1('{.name} was paralyzed by analysis and died', self) self.energy = 0 yield Creature.wait_action, tmt.icount + tmt.skipped continue decision = evaluate(self, thought.tree) sd.print2('{.name} decided to {}', self, decision) yield decision, thought.icount + thought.skipped raise StopIteration()
import os import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader import Parsing import Loading import Model if __name__ == "__main__": gpu = torch.cuda.is_available() # parse the arguments args = Parsing.Args() dataname = args.d lablname = args.l modlname = args.m epoch_num = int(args.e) nn_type = args.type # load the data and split into training and validation part train, valid = Loading.LoadTrn(dataname, lablname) train = Loading.DataSet(train, 0) valid = Loading.DataSet(valid, 0) train = DataLoader(train, batch_size=32, shuffle=True) valid = DataLoader(valid, batch_size=32, shuffle=False) print ("[Done] Loading all data!")
from Connect import * from ElementControl import * from Parsing import * if __name__ == '__main__' : # 맛집 리스트 snuUrls = [] nkdUrls = [] elem = Element() parser = Parsing() # 서울대입구 print('서울대 입구 주소') for i in range(1, 10): elem.searchPage('서울대입구', i) links = parser.getLink() for link in links: elem.searchDetail(link) parser.getData() # 낙성대 # print('낙성대역 주소') # for i in range(1, 4): # elem.searchPage('낙성대', i) # parser.getLink() #신림 # print('신림역 주소')
### Install following to run properly ### pip install pprintpp import json from pprint import pprint import Parsing import sys print(f'{sys.argv[1]}') input_file_name = sys.argv[1] output_file_name = sys.argv[2] import_id = sys.argv[3] with open(input_file_name, 'r') as file: html_content = file.read() data = Parsing.twine_parse(html_content, import_id) pprint(data) json_object = json.dumps(data, indent=4) with open(output_file_name, 'w') as file: file.write(json_object)
def parseArgs(self): """ Parse a raw command string into an OrderedDict in .argDict. """ if not self.argDict: self.argDict = Parsing.parseArgs(self.cmd)
def train(): #Parse training set and development set files train_sentences, train_labels = Parsing.parseDataset( TRAINING_SET_FILE, TRAINING_GOLD_FILE) dev_sentences, dev_labels = Parsing.parseDataset(DEV_SET_FILE, DEV_GOLD_FILE) print("Number of training sentences ", len(train_sentences)) print("Number of development sentences ", len(dev_sentences)) print() #Define the type of model to create hypernymsCompression = False #If true, use hypernymes compression technique wordnetCompression = False #If true use wordnet compression technique singleTaskLearning = True #If true use a multi task network print( "You are currently working with:\nWordnet Compression = %r\tHypernyms Compression = %r\tSingle-Task Learning = %r\n" % (wordnetCompression, hypernymsCompression, singleTaskLearning)) #Depending on the type of compression, use different labels if wordnetCompression: print("Compressing labels\n") #Return compressed labels train_labels = Mappings.lemmasToSynsets(train_sentences, train_labels, True) dev_labels = Mappings.lemmasToSynsets(dev_sentences, dev_labels, False) OUTPUT_VOCABULARY_FILE = '../../resource/Mapping_Files/wordnet_output_vocabulary.txt' elif hypernymsCompression: #Return compressed labels print("Compressing labels\n") train_hypernym_labels = Hypernyms.sensekeysToHypernyms( train_sentences, train_labels) dev_hypernym_labels = Hypernyms.sensekeysToHypernyms( dev_sentences, dev_labels) OUTPUT_VOCABULARY_FILE = '../../resource/Mapping_Files/hypernyms_output_vocabulary.txt' else: Mappings.lemmaToSensekey(train_sentences, train_labels) OUTPUT_VOCABULARY_FILE = '../../resource/Mapping_Files/sensekey_output_vocabulary.txt' #Clear and order training set print("Filter training set by removing useless sentences and order it") if hypernymsCompression: filtered_train_sentences, filtered_train_labels = CleanAndOrder.filterList( train_sentences, train_hypernym_labels) else: filtered_train_sentences, filtered_train_labels = CleanAndOrder.filterList( train_sentences, train_labels) print("Number of filtered training sentences ", len(filtered_train_sentences)) print() train_sorted_sentences, train_sorted_labels, train_length_group = CleanAndOrder.sortAndGroup( filtered_train_sentences, filtered_train_labels, True) dev_sorted_sentences, dev_sorted_labels, dev_length_group = CleanAndOrder.sortAndGroup( dev_sentences, dev_labels, False) print( "Retrieving mappings between WordNet synsets => BabelNet synsets, BabelNet synsets => WordNet Domains and BabelNet synsets => Lexical Names" ) mapping_file_list = [ BABELNET_TO_WORDNET_FILE, BABELNET_TO_WNDOMAINS_FILE, BABELNET_TO_LEXNAMES_FILE ] wordNet_to_babelNet, babelNet_to_wnDomain, babelNet_to_lexNames = Mappings.extractMappings( mapping_file_list) print("WordNet => BabelNet mapping length: ", len(wordNet_to_babelNet)) print("BabelNet => Domain mapping length ", len(babelNet_to_wnDomain)) print("BabelNet => Lexnames mapping length ", len(babelNet_to_lexNames)) print() #Define the output vocabulary in order to map labels from string to integers print("Retrieving output vocabulary") output_vocabulary = Vocabulary.extractOutputVocabulary( train_sorted_labels, dev_sorted_labels, OUTPUT_VOCABULARY_FILE) print("Size of output_vocabulary: %i\n" % len(output_vocabulary)) if singleTaskLearning: print( "Retrieving Babelnet, Domain and Lexname labels and vocabularies") train_bn_labels = Mappings.wnToBn(train_sorted_labels, wordNet_to_babelNet, wordnetCompression) dev_bn_labels = Mappings.wnToBn(dev_sorted_labels, wordNet_to_babelNet, wordnetCompression) train_domain_labels = Mappings.bnToWnDomain(train_bn_labels, babelNet_to_wnDomain) dev_domain_labels = Mappings.bnToWnDomain(dev_bn_labels, babelNet_to_wnDomain) train_lex_labels = Mappings.bnToWnLex(train_bn_labels, babelNet_to_lexNames) dev_lex_labels = Mappings.bnToWnLex(dev_bn_labels, babelNet_to_lexNames) bn_output_vocabulary, domain_output_vocabulary, lex_output_vocabulary = Vocabulary.multiTaskingVocabularies( train_bn_labels, train_domain_labels, train_lex_labels, dev_bn_labels, dev_domain_labels, dev_lex_labels) print("Size of Babelnet output_vocabulary: %i" % len(bn_output_vocabulary)) print("Size of Domain output_vocabulary: %i" % len(domain_output_vocabulary)) print("Size of Lexname output_vocabulary: %i" % len(lex_output_vocabulary)) print() #Create the embeddings for the datasets ELMo.Module(TRAIN_EMBEDDING_FILE, train_length_group) print() ELMo.Module(DEV_EMBEDDING_FILE, dev_length_group) print() #Retrieve the training inputs and labels for the network train_x = CreateDataset.padDatasets(TRAIN_EMBEDDING_FILE, MAX_LENGTH, EMBEDDING_SIZE, TRAIN_PADDED_SEQUENCES_FILE) train_y, train_sequence_length = CreateDataset.singleTaskTrainingSet( train_sorted_labels, output_vocabulary, MAX_LENGTH) #Retrieve the development inputs and labels for the network dev_x = CreateDataset.padDatasets(DEV_EMBEDDING_FILE, MAX_LENGTH, EMBEDDING_SIZE, DEV_PADDED_SEQUENCES_FILE) dev_y, dev_sequence_length = CreateDataset.singleTaskTrainingSet( dev_sorted_labels, output_vocabulary, MAX_LENGTH) #Retrieve training and development domain and lexname labels in case of a multitasking architecture if not singleTaskLearning: train_domain_y, _ = CreateDataset.singleTaskTrainingSet( train_domain_labels, domain_output_vocabulary, MAX_LENGTH) train_lexname_y, _ = CreateDataset.singleTaskTrainingSet( train_lex_labels, lex_output_vocabulary, MAX_LENGTH) dev_domain_y, _ = CreateDataset.singleTaskTrainingSet( dev_domain_labels, domain_output_vocabulary, MAX_LENGTH) dev_lexname_y, _ = CreateDataset.singleTaskTrainingSet( dev_lex_labels, lex_output_vocabulary, MAX_LENGTH) print("Dimension of train_x: ", train_x.shape) print("Dimension of train_y: ", train_y.shape) print("Dimension of dev_x: ", dev_x.shape) print("Dimension of dev_y: ", dev_y.shape) print() #Neural network model definition OUTPUT_VOCABULARY_LENGTH = len(output_vocabulary) if not singleTaskLearning: DOMAIN_VOCABULARY_LENGTH = len(domain_vocab) LEXNAME_VOCABULARY_LENGTH = len(lexname_vocab) tf.reset_default_graph() #Graph initialization g = tf.Graph() with g.as_default(): if singleTaskLearning: print("Creating single-task learning architecture") inputs, labels, input_prob, output_prob, state_prob, sequence_length, loss, train_op, acc = BiLSTM.simpleBiLSTM( BATCH_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, OUTPUT_VOCABULARY_LENGTH) else: print("Creating multi-task learning architecture") inputs, sensekey_labels, domain_labels, lexname_labels, keep_prob, lambda_1, lambda_2, sequence_length, lr, sensekey_loss, domain_loss, lexname_loss, train_op, acc = multitaskBidirectionalModel( BATCH_SIZE, EMBEDDING_SIZE, HIDDEN_SIZE, MAX_LENGTH, OUTPUT_VOCABULARY_LENGTH, DOMAIN_VOCABULARY_LENGTH, LEXNAME_VOCABULARY_LENGTH) saver = tf.train.Saver() n_iterations = int(np.ceil(len(train_x) / BATCH_SIZE)) n_dev_iterations = int(np.ceil(len(dev_x) / BATCH_SIZE)) #MAIN TRAINING LOOP with tf.Session(graph=g) as sess: #Check for the presence of checkpoints in order to restore training if tf.train.latest_checkpoint(CHECKPOINT_PATH): print("Checkpoint present. Restoring model.") saver.restore(sess, tf.train.latest_checkpoint(CHECKPOINT_PATH)) else: print("Model not present. Initializing variables.") sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(LOGGING_DIR, sess.graph) print("\nStarting training...") #We use try-catch in order to save the model when the training is stopped through a keyboard interrupt event try: for epoch in range(0, EPOCHS): if singleTaskLearning: print("\nEpoch", epoch + 1) epoch_loss, epoch_acc = 0., 0. mb = 0 print("=======" * 10) start = time.perf_counter() for batch_x, batch_y, batch_seq_length, in Utils.batch_generator( train_x, train_y, train_sequence_length, BATCH_SIZE): mb += 1 _, loss_val, acc_val = sess.run( [train_op, loss, acc], feed_dict={ inputs: batch_x, labels: batch_y, sequence_length: batch_seq_length, input_prob: 0.5, output_prob: 0.5, state_prob: 1.0 }) epoch_loss += loss_val epoch_acc += acc_val print( "{:.2f}%\tTrain Loss: {:.4f}\tTrain Accuracy: {:.4f} " .format(100. * mb / n_iterations, epoch_loss / mb, epoch_acc / mb), end="\r") elapsed = time.perf_counter() - start print('Elapsed %.3f seconds.' % elapsed) epoch_loss /= n_iterations epoch_acc /= n_iterations Utils.add_summary(train_writer, "epoch_loss", epoch_loss, epoch) Utils.add_summary(train_writer, "epoch_acc", epoch_acc, epoch) print("\n") print("Train Loss: {:.4f}\tTrain Accuracy: {:.4f}".format( epoch_loss, epoch_acc)) print("=======" * 10) # DEV EVALUATION dev_loss, dev_acc = 0.0, 0.0 for batch_x, batch_y, batch_seq_length in Utils.batch_generator( dev_x, dev_y, dev_sequence_length, BATCH_SIZE): loss_val, acc_val = sess.run( [loss, acc], feed_dict={ inputs: batch_x, labels: batch_y, sequence_length: batch_seq_length, input_prob: 0.5, output_prob: 0.5, state_prob: 1.0 }) dev_loss += loss_val dev_acc += acc_val dev_loss /= n_dev_iterations dev_acc /= n_dev_iterations Utils.add_summary(train_writer, "epoch_val_loss", dev_loss, epoch) Utils.add_summary(train_writer, "epoch_val_acc", dev_acc, epoch) print("\nDev Loss: {:.4f}\tDev Accuracy: {:.4f}".format( dev_loss, dev_acc)) #Save checkpoints every two epochs if epoch % 2 == 0: save_path = saver.save(sess, CHECKPOINT_SAVE_FILE) else: print("\nEpoch", epoch + 1) epoch_sensekey_loss, epoch_domain_loss, epoch_lexname_loss, epoch_acc, epoch_f1 = 0., 0., 0., 0., 0. mb = 0 print("=======" * 10) start = time.perf_counter() for batch_x, batch_y, batch_domain_y, batch_lexname_y, batch_seq_length, in alternative_batch_generator( train_x, train_y, train_domain_y, train_lexname_y, train_sequence_length, BATCH_SIZE): mb += 1 _, sensekey_loss_val, domain_loss_val, lexname_loss_val, acc_val = sess.run( [ train_op, sensekey_loss, domain_loss, lexname_loss, acc ], feed_dict={ sensekey_labels: batch_y, domain_labels: batch_domain_y, lexname_labels: batch_lexname_y, lambda_1: 1.0, lambda_2: 1.0, keep_prob: 0.8, inputs: batch_x, sequence_length: batch_seq_length, lr: learning_rate }) epoch_sensekey_loss += sensekey_loss_val epoch_domain_loss += domain_loss_val epoch_lexname_loss += lexname_loss_val epoch_acc += acc_val print( "{:.2f}%\tSensekey Train Loss: {:.4f}\tTrain Accuracy: {:.4f}" .format(100. * mb / n_iterations, epoch_sensekey_loss / mb, epoch_acc / mb), end="\r") elapsed = time.perf_counter() - start print('Elapsed %.3f seconds.' % elapsed) print( "{:.2f}%\tSensekey Train Loss: {:.4f}\tTrain Accuracy: {:.4f}" .format(100. * mb / n_iterations, epoch_sensekey_loss / mb, epoch_acc / mb), end="\r") epoch_sensekey_loss /= n_iterations epoch_domain_loss /= n_iterations epoch_lexname_loss /= n_iterations epoch_acc /= n_iterations Utils.add_summary(train_writer, "epoch_sensekey_loss", epoch_sensekey_loss, epoch) Utils.add_summary(train_writer, "epoch_domain_loss", epoch_domain_loss, epoch) Utils.add_summary(train_writer, "epoch_lexname_loss", epoch_lexname_loss, epoch) Utils.add_summary(train_writer, "epoch_acc", epoch_acc, epoch) print("\n") print() print("Train Sensekey Loss: {:.4f}".format( epoch_sensekey_loss)) print( "Train Domain Loss: {:.4f}".format(epoch_domain_loss)) print("Train Lexname Loss: {:.4f}".format( epoch_lexname_loss)) print("=======" * 10) # DEV EVALUATION dev_loss, dev_acc, dev_f1 = 0.0, 0.0, 0.0 for batch_x, batch_y, batch_domain_y, batch_lexname_y, batch_seq_length in alternative_batch_generator( dev_x, dev_y, dev_domain_y, dev_lexname_y, dev_sequence_length, BATCH_SIZE): loss_val, acc_val = sess.run( [sensekey_loss, acc], feed_dict={ sensekey_labels: batch_y, domain_labels: batch_domain_y, lexname_labels: batch_lexname_y, lambda_1: 1.0, lambda_2: 1.0, keep_prob: 0.8, inputs: batch_x, sequence_length: batch_seq_length, lr: learning_rate }) dev_loss += loss_val dev_acc += acc_val dev_loss /= n_dev_iterations dev_acc /= n_dev_iterations Utils.add_summary(train_writer, "epoch_val_loss", dev_loss, epoch) Utils.add_summary(train_writer, "epoch_val_acc", dev_acc, epoch) print("\nDev Loss: {:.4f}\tDev Accuracy: {:.4f}".format( dev_loss, dev_acc)) print() if epoch % 2 == 0: save_path = saver.save(sess, CHECKPOINT_SAVE_FILE) except KeyboardInterrupt: print("Keyboard interruption. Saving") save_path = saver.save(sess, COMPLETE_MODEL_FILE) train_writer.close() save_path = saver.save(sess, COMPLETE_MODEL_FILE) train_writer.close()
import tetra_dude as td import matplotlib.pyplot as plt import datetime from operator import itemgetter, attrgetter, methodcaller from numpy import * import keras from keras.preprocessing import sequence from keras.models import Sequential, Graph from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.optimizers import SGD, RMSprop, Adagrad, Adam, Adadelta from keras.utils import np_utils new = ps.sam_info('./',1211177, 92, 100) #For all lines, num_line(last) = 0 #ATGC PI = array([[0.996975631, 0.000512946175, 0.00151638794, 0.000995034679], [0.000416377636, 0.997385479, 0.000858583848, 0.00133955915], [0.000865811030, 0.000725518163, 0.997926104, 0.000482566345], [0.000634619373, 0.000847625845, 0.000434391514, 0.998083363]]) ''' #ACGT PI = array([[0.996975631, 0.000995034679, 0.00151638794, 0.000512946175], [0.000634619373, 0.998083363, 0.000434391514, 0.000847625845], [0.000865811030, 0.000482566345, 0.997926104, 0.000725518163], [0.000416377636, 0.00133955915, 0.000858583848, 0.997385479]]) ''' with open("fold1/Illumina_LinErr_100_fold1_test1.fasta", "w") as f: for i in range(len(new)):
right = 0 wrong = 0 counterrrr = 0 wrongNeg = 0 wrongPos = 0 wrongNeu = 0 totalPos = 0 totalNeg = 0 totalNeu = 0 for c in content: counterrrr += 1 split_c = c.split("@", 1) class_tags.append(split_c[1]) tempTags = parser.parse_sentence(split_c[0]) #print("senetence:", split_c[0]) #print("tags:", tempTags) #print("Actual Classification:", split_c[1]) cs = classify_sentence(split_c[0]) if "negative" in split_c[1]: totalNeg += 1 elif "positive" in split_c[1]: totalPos += 1 elif "neutral" in split_c[1]: totalNeu += 1 else: print("goofed", split_c[1]) #print("My Classification:", cs) if cs in split_c[1]:
import FileNameReading, Parsing, Structure, Functions, Clean import matplotlib.pyplot as plt import datetime import numpy as np # dictionary that contains all the filenames filenames = FileNameReading.get_file_names() all_sensors = [] for i in filenames.keys(): current_sensor = [] data = Parsing.parse(i) print("Current file being read is " + i) data = Clean.remove_empty(data) for row in data: for k, v in row.items(): if k == "Timestamp": line = row[k].split(' ') second_value = line[1].split('A') or line[1].split('P') row[k] = ((line[0]), (second_value[0])) # row[k] = (v, str(v)) current_sensor.append(row) # datetime.datetime.strptime() all_sensors.append(current_sensor) # print(all_sensors) x = []
import Calculating import Parsing if __name__ == '__main__': # 目标路径 path = input("输入\"武汉大学教务系统_files\"文件夹位置:") '''path = '.\\source''' # 解析网页获取表格 score_table = Parsing.get_table(path) # 计算平均GPA GPA_table = Calculating.calculate(score_table) print(GPA_table) input()
def virtual_server(sentence): #サーバ側での動作をシュミレートしている #-------------------------------------------- #必要なデータの収集 #-------------------------------------------- res_file = Parsing.parsing(sentence) #文書の解析を実行 dic_file = open( os.path.dirname(os.path.abspath(__file__)) + '/e-words2.txt') #マッチングファイルの読み込み dic_data = dic_file.read() #分けられていない辞書データ #-------------------------------------------- #解析結果をまとめ上げる #-------------------------------------------- tr = load_tree(res_file) #-------------------------------------------- #文書の解析を行う #-------------------------------------------- Analyzed_result = Analize.analyze(tr) #-------------------------------------------- #まとめ上げられた物から名詞のみ取り出す #-------------------------------------------- nouns = extract_nouns(tr) #print("nouns:",nouns) #print() #print() #-------------------------------------------- #IT用語集と照合 #-------------------------------------------- detection = matching(nouns, dic_data) #マッチング関数の実行 #print("detection:",detection) #print() #print() #-------------------------------------------- #前処理 #-------------------------------------------- mark_word = Make_mark_word(detection) #print("markword:",mark_word) #print() #print() #-------------------------------------------- #検知結果の部分の{}を付加する #-------------------------------------------- result_sentence = Mark(mark_word, sentence) #マッチングした文字に{}で印をつける #print("result:",result_sentence) #-------------------------------------------- #文書の校正 #-------------------------------------------- result_sentence = Proofreading.proofreading(result_sentence) #print("result:",result_sentence) #-------------------------------------------- #後掃除 #-------------------------------------------- dic_file.close() #ファイルのクローズ #print(result_sentence ,Analyzed_result) #先に難しい単語を抽出した文を返す(list型) #二個目に要点をまとめた文を返す(string型) return mark_word, Analyzed_result
from Connect import * from ElementControl import * from Parsing import * from DBConnect import * if __name__ == "__main__": #해쉬태그 url 리스트 collect = [] #맛집리스트 웹 자원 활용 객체 e = Toplist() #웹 페이지 파싱 객체 p = Parsing() #수집한 해쉬태그 리스트 hashTag = p.collectHashTag() #해쉬태그 클릭 후 URL 수집 for i in hashTag: e.tagClick(i) e.more() collect.append(p.getLink()) #딕셔너리 형으로 카테고리 별 URL 분류 category = dict() for index in range(0, len(hashTag)): category[hashTag[index]] = collect[index]
import sys import os import Parsing import Common # Parse the arguments and fill into search_paramaters search_paramaters = Common.SearchParamaters() search_paramaters = Parsing.parse_labels(sys.argv) if(len(search_paramaters.search_phrases) == 0): print("ERROR: Must include phrase to search for at the end of the run command") sys.exit() ip_folders = list() ip_file = open(search_paramaters.ips_filename, 'r') for line in ip_file: ip_folders.append(line.strip()); occurence_count = 0 for first_two_quadrants in ip_folders: for third_quadrant in range(0, 255): for fourth_quadrant in range(0, 255): path_to_file = first_two_quadrants + '/' + first_two_quadrants + "." + str(third_quadrant) + "." + str(fourth_quadrant) if os.path.exists(path_to_file): enc = 'utf-8' webpage = open(path_to_file, 'r', encoding = enc) try: found = False for line in webpage:
from collections import defaultdict import os #local files import Parsing #rules = [] #statement = [] fileName = "example.txt" filepath = os.getcwd() + "\\" + fileName file = open(filepath, 'r') # splitting the file into a list of lines lines = file.read().splitlines() # parsing the lines and etracting the rules and statements from them lines, statements, rules = Parsing.parseLines(lines) # creating a dictionary with all fuzzy sets fuzzyDictionary = Parsing.generateDict(lines) def hasNumbers(inputString): return any(char.isdigit() for char in inputString) # access the fuzzy tuples in the dictionary by providing the names of the set and subset def getSet(upperName, name): tempList = fuzzyDictionary[upperName] output = [] for dic in tempList: if name in dic: #dict.values() returns a view obj so we need to cast it
gs1 = plt.subplot2grid((2, 1), (0, 0)) nutzergraph = False f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}) # ISSUE: "_" is currently not excluded like "." is, no known occurrences, outdated? # This program creates an image that visualizes a given log file in relation to a given quota. plt.rcParams['figure.figsize'] = [6, 4] # set global parameters, plotter initialisation # translate_date_to_sec receives a date and returns the date in unix-seconds, if it's a valid date, fmt = "%Y-%m-%d-%H-%M" # standard format for Dates, year month, day, hour, minute quotaexists = 0 number_id=0 # Formats the Date into Month and Year. myFmt = mdates.DateFormatter('%b %y') nothing = mdates.DateFormatter(' ') ap = argparse.ArgumentParser() # Reads parameter inputs. Parsing.argparsinit(ap, sys.argv) originals = Parsing.get_original() partial_quota = Parsing.get_partial_quota() yearly_quota = Parsing.get_yearly_quota() start_point = Parsing.get_start_point() filter_n = Parsing.get_filter() originals = Parsing.get_original() nutzergraph = Parsing.get_nutzer_graph() datum = Parsing.get_datum() number_of_months_DB = Parsing.get_number_of_months() target = Parsing.get_target() Parameternummer = Parsing.get_parameter_nr() #print("PARANR",Parameternummer) ###### SQL connection to projectrequest database ##### if Parameternummer: # tries obtaining quota and startdate from projectdatabase user = getpass.getuser()
outputcode = context.code() outfile.write(outputcode) else: option_parser.print_help() # -------------------------------------------------------# # global parser stuff, needs to be here # -------------------------------------------------------# # Introspect this module to generate a parser. Enable all the bells and # whistles. spec = Parsing.Spec(sys.modules[__name__], pickleFile="codegen.pickle", skinny=False, logFile="codegen.log", graphFile="codegen.dot", verbose=True) # Create a parser that uses the parser tables encapsulated by spec. In this # program, we are only creating one parser instance, but it is possible for # multiple parsers to use the same Spec simultaneously. parser = Parser(spec) # # Global variables to collect needed information during parsing. # verbose = False # Switch on debug output? debug = False # Switch on verbose output? includes_table = {} # Maps sorts to include files
def get_diet(): global isTomorrow meal = get_meal() diet = Parsing.dietExtract(meal, isTomorrow) return diet
import numpy as np import Parsing import matplotlib.pyplot as plt from scipy.spatial import Voronoi, voronoi_plot_2d, Delaunay from scipy.spatial import KDTree, tsearch data = Parsing.parse("individual_sensors_data.csv") # print(data) long = [] lat = [] cur = [] for y in data: for k, v in y.items(): if v == "I35 N": cur.append(y) for x in cur: for k, v in x.items(): if k == "Longitude": long.append(float(v)) elif k == "Latitude": lat.append(float(v)) # print(lat) # print(long)