def check_created_image_regex(first_image_url, second_image_url, regex): matches = Regex.Matches(first_page_source._source, regex, RegexOptions.IgnoreCase) escaped_first_image_url = escape_uri_string(first_image_url) escaped_second_image_url = escape_uri_string(second_image_url) if matches.Count == 0: return False, 0 else: if debug: print "\nFound " + str( matches.Count ) + " match(es) on the first page with regex: " + regex print "Captured: " + matches[0].Value print "link group: " + matches[0].Groups["link"].Value #We don't care if there is more than one result. As long as the first result is the correct image result, image_uri = Uri.TryCreate(first_page_uri, matches[0].Groups["link"].Value) #Valid url and matches the input image url if result and image_uri.AbsoluteUri in (first_image_url, escaped_first_image_url): if debug: print "Valid uri and matches image url" else: if debug: print "Not a valid uri or doesn't match image url" return False, 0 matches_second = Regex.Matches(second_page_source._source, regex, RegexOptions.IgnoreCase) if matches_second.Count == 0: if debug: print "No matches on the second page" return False, 0 #Regex match on the second page. Same deal as above. We don't care if there is more than one result if debug: print "\nFound " + str( matches_second.Count) + " match(es) on the second page" print "Captured: " + matches_second[0].Value print "link group: " + matches_second[0].Groups["link"].Value result, image_uri = Uri.TryCreate( second_page_uri, matches_second[0].Groups["link"].Value) if result and image_uri.AbsoluteUri in (second_image_url, escaped_second_image_url): if debug: print "Regex works on both pages and returns the correct image" return True, matches.Count else: if debug: print "Invalid Uri or doesn't match the second image url" return False, 0 return False, matches.Count
def browserEnum(): summary = printHeader("BROWSER ENUM") regex = Regex('(http|ftp|https|file)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?') #Active IE Urls summary += printSubheader("ACTIVE EXPLORER URLS") app = Activator.CreateInstance(Type.GetTypeFromProgID("Shell.Application")) summary += "\n".join([w.LocationUrl() for w in app.Windows()]) #Chrome History summary += printSubheader("\n\nChrome History") try: cHistPath = "{0}\Users\{1}\AppData\Local\Google\Chrome\User Data\Default\History".format(Env.GetEnvironmentVariable("systemdrive"), Env.UserName) cHist = open(cHistPath, "r").read() summary += "\n".join(["[*] {0}\n".format(m.Value) for m in regex.Matches(cHist)][-10:]) except: pass summary += printSubheader("\nChrome Bookmarks") #Chrome Bookmarks try: cBMPath = "{0}\Users\{1}\AppData\Local\Google\Chrome\User Data\Default\Bookmarks".format(Env.GetEnvironmentVariable("systemdrive"), Env.UserName) js = JavaScriptSerializer() cBM = js.DeserializeObject(open(cBMPath, "r").read()) urls = cBM["roots"]["bookmark_bar"]["children"] for url in urls: u = url['url'] d = url['name'] summary += "[*] {0}\n{1}\n\n".format(d, u) except: pass summary += printSubheader("Firefox History") #Firefox History try: regex = Regex('(http|ftp|https|file)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?') fHistPath = "{0}\Users\{1}\AppData\Roaming\Mozilla\Firefox\Profiles".format(Env.GetEnvironmentVariable("systemdrive"), Env.UserName) for path in DirectoryInfo(fHistPath).EnumerateDirectories("*.default"): places = open(path.FullName + "\places.sqlite", "r").read() summary += "\n".join(["[*] {0}\n".format(m.Value) for m in regex.Matches(places)][:10]) except: pass summary += printSubheader("IE History") typedUrlPath = "\Software\Microsoft\Internet Explorer\TypedURLs" for sid in Registry.Users.GetSubKeyNames(): if sid != ".DEFAULT" and not sid.endswith("Classes"): try: typedUrlsKey = Registry.Users.OpenSubKey(sid + typedUrlPath) if typedUrlsKey != None: summary += "[{0}][{1}]\n".format(sid, SecurityIdentifier(sid.ToString()).Translate(NTAccount)) for value in typedUrlsKey.GetValueNames(): summary += "\t{0}\n".format(typedUrlsKey.GetValue(value)) summary += "\n" except SystemError: pass return summary
def check_regex_against_source(regex, check_value, check_value2): matches = Regex.Matches(first_page_source._source, regex._regex, RegexOptions.IgnoreCase) if matches.Count != 0: if debug: print "\n\nFound " + str( matches.Count ) + " match(es) on the first page with regex: " + regex._regex print "Captured: " + matches[0].Value print "link group: " + matches[0].Groups["link"].Value #We don't care if there is more than one result. As long as the first result is the correct image result, result_uri = Uri.TryCreate(first_page_uri, matches[0].Groups["link"].Value) if result and result_uri.AbsoluteUri in (check_value, check_value2): #Valid url and matches against the check_value if debug: print "Valid uri" else: return False matches_second = Regex.Matches(second_page_source._source, regex._regex, RegexOptions.IgnoreCase) if matches_second.Count == 0: if debug: print "No matches found on the second page" return False #Regex match on the second page. Same deal as above. We don't care if there is more than one result if debug: print "\nFound " + str( matches_second.Count) + " match(es) on the second page" print "Captured: " + matches_second[0].Value print "link group: " + matches_second[0].Groups["link"].Value result, result_uri = Uri.TryCreate( second_page_uri, matches_second[0].Groups["link"].Value) if result: regex._matches = matches.Count if debug: print "Added to valid regex" return regex else: if debug: print "Invalid uri" return False return False
def FindScanners(worker, books): #Load the various settings. settings is a dict settings = LoadSettings() #Load the scanners unformatedscanners = LoadListFromFile(SCANNERSFILE) #Sort the scanners by length and reverse it. For example cl will come after clickwheel allowing them to be matched correctly. unformatedscanners.sort(key=len, reverse=True) #Format the scanners for use in the regex scanners = "|".join(unformatedscanners) scanners = "(?<Tags>" + scanners + ")" #Load the blacklist and format it blacklist = LoadListFromFile(BLACKLISTFILE) blacklist.extend(LoadUserBlackListFromFile(USERBLACKLISTFILE)) formatedblacklist = "|".join(blacklist) #Add in the blacklist #These amazing regex are designed by the amazing Helmic. pattern = r"(?:(?:__(?!.*__[^_]))|[(\[])(?!(?:" + formatedblacklist + r"|[\s_\-\|/,])+[)\]])(?<Tags>(?=[^()\[\]]*[^()\[\]\W\d_])[^()\[\]]{2,})[)\]]?" replacePattern = r"(?:[^\w]|_|^)(?:" + formatedblacklist + r")(?:[^\w]|_|$)" #Create the regex regex = Regex(pattern, RegexOptions.IgnoreCase) regexScanners = Regex(scanners, RegexOptions.IgnoreCase) regexReplace = Regex(replacePattern, RegexOptions.IgnoreCase) ComicBookFields = ComicRack.App.GetComicFields() ComicBookFields.Remove("Scan Information") ComicBookFields.Add("Language", "LanguageAsText") for book in books: #.net Regex #Note that every possible match is found and then the last one is used. #This is because in some rare cases more than one thing is mistakenly matched and the scanner is almost always the last match. matches = regex.Matches(book.FileName) unknowntag = "" try: match = matches[matches.Count - 1] except ValueError: #No match #print "Trying the Scanners.txt list" #Check the defined scanner names match = regexScanners.Match(book.FileName) #Still no match if match.Success == False: if settings["Unknown"] != "": unknowntag = settings["Unknown"] else: continue #Check if what was grabbed is a field in the comic fields = [] for field in ComicBookFields.Values: fields.append(unicode(getattr(book, field)).lower()) if match.Groups["Tags"].Value.lower() in fields: print "Uh oh. That matched tag is in the info somewhere." newmatch = False for n in reversed(range(0, matches.Count - 1)): if not matches[n].Groups["Tags"].Value.lower() in fields: match = matches[n] newmatch = True break if newmatch == False: if settings["Unknown"] != "": unknowntag = settings["Unknown"] else: continue #Check if the match can be found in () in the series, title or altseries titlefields = [ book.ShadowSeries, book.ShadowTitle, book.AlternateSeries ] abort = False for title in titlefields: titleresult = re.search("\((?P<match>.*)\)", title) if titleresult != None and titleresult.group( "match").lower() == match.Groups["Tags"].Value.lower(): #The match is part of the title, series or altseries so skip it print "The match is part of the title, series or altseries" abort = True break if abort == True: if settings["Unknown"] != "": unknowntag = settings["Unknown"] else: continue #Get a list of the old ScanInformation oldtags = book.ScanInformation ListOfTagsTemp = oldtags.split(",") if '' in ListOfTagsTemp: ListOfTagsTemp.remove('') ListOfTags = [] if ListOfTagsTemp != []: for indtag in ListOfTagsTemp: ListOfTags.append(indtag.strip()) #Create our new tag if unknowntag != "": newtag = settings["Prefix"] + unknowntag else: newtag = settings["Prefix"] + regexReplace.Replace( match.Groups["Tags"].Value.strip("_, "), "") if newtag not in ListOfTags: ListOfTags.append(newtag) #Sort alphabeticaly to be neat ListOfTags.sort() #Add to ScanInformation field book.ScanInformation = ", ".join(ListOfTags)