def r_findtext(arglist): if(arglist[0] != 'in'): tgl.returnError("Function Error", "FindText function illegal syntax. Correct syntax should be findText in url with [keyword expression]", False) return False else: link = arglist[1] parts = urlparse.urlsplit(link) if not parts.scheme or not parts.netloc: link = "http://" + link try: html = urlopen(link) soup = BeautifulSoup(html) texts = soup.find_all('p') keyparas = [] for para in texts: para = para.get_text() truthiList = "" if len(arglist[2:]) == 0: keyparas.append(para) else: for entry in arglist[2:]: if str(type(entry)) != "<type 'list'>": if entry in LOGICALS: truthiList = truthiList + " " + entry elif entry in IGNORE: pass elif entry in para: truthiList = truthiList + " True" else: truthiList = truthiList + " False" if eval(truthiList): keyparas.append(para) return keyparas except: tgl.returnError("Run Time Error", "Can\'t open the link: " + link + " in findtext", False)
def r_insert(arglist): if len(arglist) != 3: tgl.returnError("Run Time Error", "Insert function invalid. Correct syntax should be insert \"url/text\" into \"urllist/textlist\"", True) return 0 try: arglist[2].append(arglist[0]) return arglist[2] except: tgl.returnError("Run Time Error", "Cannot append " + arglist[0] + " to " + arglist[2], True)
def r_read(arglist): filename = arglist[0] try: read_url_list = [] with open(filename, "r") as inputfile: for item in inputfile: read_url_list.append(item.rstrip('\n')) return read_url_list except IOError: tgl.returnError("Run Time Error", "Can\'t find the txt file or read data from the txt file", True)
def r_save(arglist): if(arglist[1] != 'into'): tgl.returnError("Function Error", "Save function illegal syntax. Correct syntax should be save list of urls (or variable that contains a list of urls) into filename", True) return False else: try: data = arglist[0] filename = arglist[2] outfile = open(filename, 'w') if str(type(data)) == str("<type 'str'>"): outfile.write(data + "\n") else: for item in data: outfile.write(item + "\n") return True except: tgl.returnError("Run Time Error", "Cannot save " + arglist[0] + " into " + arglist[2], True)
def r_append(arglist): if(arglist[1] != 'into'): tgl.returnError("Function Error", "Append function illegal syntax. Correct syntax should be append url (or variable containing a url) into filename", False) return False else: try: data = arglist[0] filename = arglist[2] if str(type(data)) == str("<type 'list'>"): for listitem in data: with open(filename, "a") as modifiedfile: modifiedfile.write("\n" + listitem) else: with open(filename, "a") as modifiedfile: modifiedfile.write("\n" + data) return True except: tgl.returnError("Run Time Error", "Cannot append " + arglist[0] + " to " + arglist[2], True)
def filterindentation(self, inputline): indentlevel = 0 tgl.indentback = 0 while inputline[0] == '\t': inputline = inputline[1:] indentlevel = indentlevel + 1 if indentlevel < self.lastindentlevel: tgl.varlist = tgl.varlist[0:indentlevel+1] if len(inputline) > 6 and (inputline[0:6] == 'define' or inputline[0:3] == 'for' or inputline[0:2] == 'if'): indentlevel = indentlevel + 1 tgl.indentback = -1 tgl.indentlevel = indentlevel if indentlevel == self.lastindentlevel + 1: tgl.varlist.append(copy(tgl.varlist[self.lastindentlevel])) elif indentlevel > self.lastindentlevel + 1: tgl.returnError("Syntax Error", "Cannot indent forward by more than one level at at time", False) pass self.lastindentlevel = indentlevel return inputline
def r_findurl(arglist): if(arglist[0] != 'in'): tgl.returnError("Function Error", "FindUrl function illegal syntax. Correct syntax should be findUrl in urllist with [keyword expression]", False) return False elif(arglist[2] != 'with'): tgl.returnError("Function Error", "FindUrl function illegal syntax. Correct syntax should be findUrl in urllist with [keyword expression]", False) return False else: # arglist[1] is the urlList to search (set() removes duplicates) this_urllist = set(arglist[1]) result = [] for this_url in this_urllist: parts = urlparse.urlsplit(this_url) if not parts.scheme or not parts.netloc: this_url = "http://" + this_url try: soup = BeautifulSoup(urlopen(this_url)) truthiList = "" if len(arglist[1:]) == 0: result.append(this_url) else: for entry in arglist[1:]: if str(type(entry)) != "<type 'list'>": if entry in LOGICALS: truthiList = truthiList + " " + entry elif entry in IGNORE: pass elif soup.find_all(text = re.compile(entry)): truthiList = truthiList + " True" elif soup.find_all(text = re.compile(entry.title())): truthiList = truthiList + " True" else: truthiList = truthiList + " False" if eval(truthiList): result.append(this_url) except: tgl.returnError("Run Time Error", "Can\'t open the link: " + this_url + " in findurl", False) return result
def checkaslintegrity(self, inputline): if inputline[0][0] == "indentlevel": if inputline[1][0] == "declaration": if (inputline[1][1][0] == "datatype") and (inputline[1][1][1] in ["number", "text", "url", "numlist", "textlist", "urllist"]): pass else: tgl.returnError("Syntax Error", "Declaration syntax mismatch", False) elif inputline[1][0] == "assignment": if inputline[1][1][0] == "variable": pass else: tgl.returnError("Syntax Error", "Assignment syntax mismatch", False) elif inputline[1][0] == "expression": if inputline[1][1][0] == "functioncall": if inputline[1][1][1][0] == "functionname": pass else: tgl.returnError("Syntax Error", "Expression function name mismatch", False) else: tgl.returnError("Syntax Error", "Expression function syntax mismatch", False) elif inputline[1][0] == "conditional": if inputline[1][1][0] == "control": pass else: tgl.returnError("Syntax Error", "Conditional function syntax mismatch", False) elif inputline[1][0] == "forstatement": pass elif inputline[1][0] == "custom": pass else: tgl.returnError("Syntax Error", "Missing syntax header", False) pass else: tgl.returnError("Syntax Error", "Missing indentation information", False)
def t_error(t): tgl.returnError("Lexing Error", "Encounter illegal character '%s'" % t.value[0], False) t.lexer.skip(1)
def p_error(p): tgl.returnError("Syntax Error", "Syntax error at '%s'" % p.value, False)