def split_plus_test(): separator_array_ = ['ab', ', ', ' ', 'e', '中文', ':', '|'] str_ = 'abc,de fgh, ijk,lmn:op|qr中文st' separator_array_ = TypeUtils.convert_to_type(separator_array_, unicode) str_ = TypeUtils.convert_to_type(str_, unicode) print StringUtils.split_plus(str_, separator_array_)
def build_meth_vocab(vocab_file, meth_file): vocab_file.seek(0) meth_file.seek(0) vocab = {} last_func = "" for line in vocab_file: if not line[0] == '\t': last_func = line.split()[0] vocab[last_func] = {} else: n = len(line.split()) vocab[last_func][n] = {} for i in range(n): vocab[last_func][n][i-1] = {} for line in meth_file: parts = line.split('#') if len(parts) > 1: call = t.splitLit(parts[0]) func = call[0] args = call[1:] print call n = len(args) for i in range(n): if not args[i] in vocab[func][n][i-1]: vocab[func][n][i-1][args[i]] = 0 vocab[func][n][i-1][args[i]] += 1 return vocab
def convert_to_type_test(): print TypeUtils.convert_to_type(['杭州西湖', ['北京']], unicode) print TypeUtils.convert_to_type(TypeUtils.convert_to_type(['杭州西湖', '123', 123], int), unicode) print TypeUtils.convert_to_type([['123', '中文'], 123], float)
def getVarSents2(sents): vsents = [] for sent, v_list in sents: for var in v_list: sen = [] for stat, ctx in sent: app = [] for i in range(len(stat)): if stat[i] == var[2]: app.append(i - 3) if len(app) > 0: s = t.getSig(stat, v_list) f = s[0] n = len(s) - 1 sen.append((f, n, tuple(app))) vsents.append(sen) return vsents
def getVarSents(sents): vsents = [] for sent, v_list in sents: for var in v_list: sen = [] for stat, ctx in sent: app = [] for i in range(len(stat)): if stat[i] == var[2]: app.append(i - 3) if len(app) > 0: s = t.getSig(stat, v_list) s.append('|') s.extend(app) sen.append((s, ctx)) vsents.append(sen) return vsents
def getVarSents(sents): vsents = [] for sent, v_list in sents: for var in v_list: sen = [] for stat, ctx in sent: app = [] for i in range(len(stat)): if stat[i] == var[2]: app.append(i - 3) if len(app) > 0: s = t.getSig(stat, v_list) s.append("|") s.extend(app) sen.append((s, ctx)) vsents.append(sen) return vsents
def getVarLines(var_file): var_file.seek(0) sents = [] sent = [] for line in var_file: if not line[0] == '<': parts = line.split('|') inf = tuple(map(int, parts[1].split())) call = t.splitLit(parts[0]) func = call[0] n = len(call) - 1 stat = (func, n, inf) sent.append(stat) else: sent.append(line[:-1]) if line == "<END>\n": sents.append(sent) sent = [] return sents
def getReducedLines(meth_file): meth_file.seek(0) sents = [] sent = [] for line in meth_file: if not line[0] == '<': parts = line.split('#') inf = tuple(parts[1].split()) call = t.splitLit(parts[0]) func = call[0] n = len(call) - 1 stat = (func, n) sent.append((stat, inf)) else: sent.append((line[:-1], ())) if line == "<END>\n": sents.append(sent) sent = [] return sents
def format_station(station): """ purpose: format a measure as a station value with hundreds offset 0+00 notation arguments: station: number station measurement return value: string measurement formatted as 0+00 None if error """ try: if TypeUtils.is_numeric(station): station_str = str(station) if station < 100 and station >= 10: return "0+{0}".format(station_str) elif station < 10: return "0+0{0}".format(station_str) else: if "." in station_str: return "{0}+{1}.{2}".format( station_str.split(".")[0][:-2], station_str.split(".")[0][-2:], station_str.split(".")[1] ) else: return "{0}+{1}".format( station_str.split(".")[0][:-2], station_str.split(".")[0][-2:] ) else: return None except Exception: return None
def main(): par = plyj.parser.Parser() modes = ["cfs", "levels"] if len(sys.argv) > 1: mode = sys.argv[1] else: mode = "levels" if mode not in modes: mode = "levels" corpus_path = "../Java/Corpus/" data_path = "../Data/Raw" #### meth_name = "method_sentences_" + mode + ".txt" var_name = "variable_sentences_" + mode + ".txt" vocab_name = "vocab_" + mode + ".txt" #### meth_file = open(os.path.join(data_path, meth_name), 'w') var_file = open(os.path.join(data_path, var_name), 'w') vocab_file = open(os.path.join(data_path, vocab_name), 'w') #### vocab = {} sf = [] fields = [] ctr = 1 blacklist = ["5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa"] for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for i, cu in cus: sf2, fi, sents = seq.getSents(cu, i, mode) sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: meth_file.write("<S2>\n") meth_file.write("<S1>\n") for stat, ctx in sent: meth_file.write(e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n') s = t.getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) meth_file.write('<END>\n') vsents = seq.getVarSents(sents) for vsent in vsents: var_file.write("<S2>\n") var_file.write("<S1>\n") for stat, ctx in vsent: var_file.write(e.nstr(stat) + '\n') var_file.write('<END>\n') #break for s in vocab: vocab_file.write(s + '\n') for sig in t.resolveSigs(vocab[s]): vocab_file.write('\t' + e.nstr(sig) + '\n') meth_file.close() var_file.close() vocab_file.close()
def main(): #par = plyj.parser.Parser() modes = ["cfs", "levels"] if len(sys.argv) > 1: mode = sys.argv[1] else: mode = "levels" if mode not in modes: mode = "levels" data_path = "../Data/Raw" new_path = "../Data/Revised" #### meth_name = "method_sentences_" + mode + ".txt" var_name = "variable_sentences_" + mode + ".txt" vocab_name = "vocab_" + mode + ".txt" counts_name = "counts_" + mode + ".txt" memm_name = "memm_" + mode + ".txt" #### meth_file = open(os.path.join(data_path, meth_name), 'r') var_file = open(os.path.join(data_path, var_name), 'r') vocab_file = open(os.path.join(data_path, vocab_name), 'r') nvocab_file = open(os.path.join(new_path, vocab_name), 'wb') count_file = open(os.path.join(new_path, counts_name), 'wb') memm_file = open(os.path.join(new_path, memm_name), 'wb') #### meth_sigs = build_meth_vocab(vocab_file, meth_file) meth_vocab_list = {} ctr = 0 for f in meth_sigs: print f for n in meth_sigs[f]: if not (f,n) in meth_vocab_list: meth_vocab_list[(f, n)] = ctr ctr += 1 print '\t' + str(n) + " | ", for i in range(n): print str(i-1) + ":( ", for ty in meth_sigs[f][n][i-1]: print ty + '/' + str(meth_sigs[f][n][i-1][ty]) + ' ', print ") ", print meth_vocab_list["<END>"] = ctr meth_vocab_list["<S1>"] = ctr+1 meth_vocab_list["<S2>"] = ctr+2 pot_var_vocab_list = {} ctr = 0 for k in meth_vocab_list: if type(k) is not str: f, n = k for s in t.powerset([i-1 for i in range(n)]): pot_var_vocab_list[(f, n, tuple(s))] = ctr ctr += 1 pot_var_vocab_list["<END>"] = ctr pot_var_vocab_list["<S1>"] = ctr+1 pot_var_vocab_list["<S2>"] = ctr+2 vsents = getVarLines(var_file) act_var_vocab_list = {} ctr = 0 for s in vsents: for stat in s: if not stat in act_var_vocab_list: act_var_vocab_list[stat] = ctr ctr += 1 pickle.dump((meth_sigs, meth_vocab_list, pot_var_vocab_list, act_var_vocab_list), nvocab_file) nvocab_file.close() print len(meth_vocab_list) print len(pot_var_vocab_list) print len(act_var_vocab_list) meth_sents = getReducedLines(meth_file) meth_sents = seq.getFeatures(meth_sents) X = [meth_sents[i][j][1] for i in range(len(meth_sents)) for j in range(len(meth_sents[i]))] print len(X) y = [meth_vocab_list[meth_sents[i][j][0]] for i in range(len(meth_sents)) for j in range(len(meth_sents[i]))] meth_ngram, meth_N1p, meth_ch = getNTuples(meth_sents, meth_vocab_list, "meth") print "N-GRAMS" pot_var_ngram, pot_var_N1p, pot_var_ch = getNTuples(vsents, pot_var_vocab_list, "var") print "N-GRAMS" act_var_ngram, act_var_N1p, act_var_ch = getNTuples(vsents, act_var_vocab_list, "var") print "N-GRAMS" pickle.dump(((meth_ngram, meth_N1p, meth_ch), (pot_var_ngram, pot_var_N1p, pot_var_ch), (act_var_ngram, act_var_N1p, act_var_ch)), count_file) count_file.close() MEMM = linear_model.LogisticRegression() if not mode == "cfs": MEMM.fit(X,y) pickle.dump(MEMM, memm_file) meth_file.close() var_file.close() memm_file.close()
def main(): par = plyj.parser.Parser() modes = ["cfs", "levels"] if len(sys.argv) > 1: mode = sys.argv[1] else: mode = "levels" if mode not in modes: mode = "levels" corpus_path = "../Java/Corpus/" data_path = "../Data/Raw" #### meth_name = "method_sentences_" + mode + ".txt" var_name = "variable_sentences_" + mode + ".txt" vocab_name = "vocab_" + mode + ".txt" #### meth_file = open(os.path.join(data_path, meth_name), 'w') var_file = open(os.path.join(data_path, var_name), 'w') vocab_file = open(os.path.join(data_path, vocab_name), 'w') #### vocab = {} sf = [] fields = [] ctr = 1 blacklist = [ "5a8beeae20366b5094d0db8148e0563", "3cd87ee90872cfcb72b3cb3b773d8efa" ] for subdir, dirs, files in os.walk(corpus_path): for f in files: clear = True for h in blacklist: if h in f: clear = False if f.endswith(".java") and clear: p = os.path.join(subdir, f) cus = e.ExtractCode(par, p) for i, cu in cus: sf2, fi, sents = seq.getSents(cu, i, mode) sf.extend(sf2) fields.extend(fi) print str(ctr) + ": " + str(len(sents)) ctr += 1 for sent, vl in sents: meth_file.write("<S2>\n") meth_file.write("<S1>\n") for stat, ctx in sent: meth_file.write( e.nstr(t.getSig(stat, vl, False)) + ' # ' + e.nstr(ctx) + '\n') s = t.getSig(stat, vl) if not s[0] in vocab: vocab[s[0]] = [] vocab[s[0]].append(s[1:]) meth_file.write('<END>\n') vsents = seq.getVarSents(sents) for vsent in vsents: var_file.write("<S2>\n") var_file.write("<S1>\n") for stat, ctx in vsent: var_file.write(e.nstr(stat) + '\n') var_file.write('<END>\n') #break for s in vocab: vocab_file.write(s + '\n') for sig in t.resolveSigs(vocab[s]): vocab_file.write('\t' + e.nstr(sig) + '\n') meth_file.close() var_file.close() vocab_file.close()
def calc_coords(starting_point, distance, h_angle, v_angle): """ purpose: Calculate coordinates given a starting point, distance, horizontal angle, and vertical angle arguments: starting_point: tuple of float (x, y, z) coordinates of the starting point distance: float 3D distance to the new point h_angle: float Horizontal angle, counter clockwise from east 0 <= horiz_angle < 360 v_angle: float Vertical angle, 180 degrees from zenith to nadir Horizon is 90 0 <= vert_angle <= 180 return value: dictionary success: boolean coords: tuple (x, y, z) coordinates of the new point messages: list """ ret_dict = {"messages": [], "coords": ()} try: # validate starting_point if len(starting_point) != 3: ret_dict["messages"].append("Error in calc_coords: starting_point does not have 3 items") ret_dict["success"] = False return # validate distance if TypeUtils.is_numeric(distance): if distance <= 0: ret_dict["messages"].append("Error in calc_coords: distance must be > 0") ret_dict["success"] = False return else: ret_dict["messages"].append("Error in calc_coords: distance must be a number") ret_dict["success"] = False return # validate h_angle if TypeUtils.is_numeric(distance): if not 0 <= h_angle < 360: ret_dict["messages"].append("Error in calc_coords: horizontal angle much be between 0 and 360") ret_dict["success"] = False return else: ret_dict["messages"].append("Error in calc_coords: horizontal angle must be a number") ret_dict["success"] = False return # validate v_angle if TypeUtils.is_numeric(distance): if not 0 <= h_angle <= 180: ret_dict["messages"].append("Error in calc_coords: vertical angle much be between 0 and 180") ret_dict["success"] = False return else: ret_dict["messages"].append("Error in calc_coords: vertical angle must be a number") ret_dict["success"] = False return # get the starting coordinates x1 = starting_point[0] y1 = starting_point[1] z1 = starting_point[2] # compute the new coordinates x2 = x1 + (distance * math.sin(math.radians(v_angle)) * math.cos(math.radians(h_angle))) y2 = y1 + (distance * math.sin(math.radians(v_angle)) * math.sin(math.radians(h_angle))) z2 = z1 + (distance * math.cos(math.radians(v_angle))) # return the new point ret_dict["coords"] = (x2, y2, z2) ret_dict["success"] = True except Exception as e: ret_dict["messages"].append("Error: {0}".format(str(e))) ret_dict["success"] = False finally: return ret_dict return