def replace_date_values(string):
    string = Regex.Replace(string, r"(?<=/)\d+(?=/)", r"(?<=/)\d+(?=/)",
                           RegexOptions.IgnoreCase)
    string = Regex.Replace(
        string,
        r"(?<=/)(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)(?=/)",
        r"(?<=/)(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|june?|july?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)(?=/)",
        RegexOptions.IgnoreCase)

    return string
Ejemplo n.º 2
0
    def regexReplace(self, myString, myVals):
        '''
		todo: this *might* work!
		'''
        #myString = 'der Hugo'
        #oldVal = r'\w*'
        #newVal = r'$&$&'

        regexp = Regex(myVals[0],
                       RegexOptions.IgnoreCase | RegexOptions.Singleline)
        return regexp.Replace(myString, myVals[1])
Ejemplo n.º 3
0
                def onSave():
                    try:
                        fs = None
                        sr = None
                        sw = None

                        try:
                            fs = FileStream(__file__, FileMode.Open,
                                            FileAccess.ReadWrite,
                                            FileShare.Read)
                            encoding = UTF8Encoding(False)
                            sr = StreamReader(fs, encoding, True)
                            lines = Regex.Replace(
                                Regex.Replace(
                                    sr.ReadToEnd(), "username\\s*=\\s*\"\"",
                                    String.Format("username = \"{0}\"",
                                                  username),
                                    RegexOptions.CultureInvariant),
                                "password\\s*=\\s*\"\"",
                                String.Format("password = \"{0}\"", password),
                                RegexOptions.CultureInvariant)
                            fs.SetLength(0)
                            sw = StreamWriter(fs, encoding)
                            sw.Write(lines)

                        finally:
                            if sw is not None:
                                sw.Close()

                            if sr is not None:
                                sr.Close()

                            if fs is not None:
                                fs.Close()

                    except Exception, e:
                        Trace.WriteLine(e.clsException.Message)
                        Trace.WriteLine(e.clsException.StackTrace)
def FindScanners(worker, books):

    #Load the various settings. settings is a dict
    settings = LoadSettings()

    #Load the scanners
    unformatedscanners = LoadListFromFile(SCANNERSFILE)

    #Sort the scanners by length and reverse it. For example cl will come after clickwheel allowing them to be matched correctly.
    unformatedscanners.sort(key=len, reverse=True)

    #Format the scanners for use in the regex
    scanners = "|".join(unformatedscanners)
    scanners = "(?<Tags>" + scanners + ")"

    #Load the blacklist and format it
    blacklist = LoadListFromFile(BLACKLISTFILE)

    blacklist.extend(LoadUserBlackListFromFile(USERBLACKLISTFILE))

    formatedblacklist = "|".join(blacklist)

    #Add in the blacklist

    #These amazing regex are designed by the amazing Helmic.

    pattern = r"(?:(?:__(?!.*__[^_]))|[(\[])(?!(?:" + formatedblacklist + r"|[\s_\-\|/,])+[)\]])(?<Tags>(?=[^()\[\]]*[^()\[\]\W\d_])[^()\[\]]{2,})[)\]]?"

    replacePattern = r"(?:[^\w]|_|^)(?:" + formatedblacklist + r")(?:[^\w]|_|$)"

    #Create the regex

    regex = Regex(pattern, RegexOptions.IgnoreCase)
    regexScanners = Regex(scanners, RegexOptions.IgnoreCase)
    regexReplace = Regex(replacePattern, RegexOptions.IgnoreCase)

    ComicBookFields = ComicRack.App.GetComicFields()
    ComicBookFields.Remove("Scan Information")
    ComicBookFields.Add("Language", "LanguageAsText")

    for book in books:

        #.net Regex
        #Note that every possible match is found and then the last one is used.
        #This is because in some rare cases more than one thing is mistakenly matched and the scanner is almost always the last match.
        matches = regex.Matches(book.FileName)
        unknowntag = ""

        try:
            match = matches[matches.Count - 1]

        except ValueError:

            #No match
            #print "Trying the Scanners.txt list"

            #Check the defined scanner names
            match = regexScanners.Match(book.FileName)

            #Still no match
            if match.Success == False:
                if settings["Unknown"] != "":
                    unknowntag = settings["Unknown"]
                else:
                    continue

        #Check if what was grabbed is a field in the comic
        fields = []
        for field in ComicBookFields.Values:
            fields.append(unicode(getattr(book, field)).lower())

        if match.Groups["Tags"].Value.lower() in fields:
            print "Uh oh. That matched tag is in the info somewhere."
            newmatch = False
            for n in reversed(range(0, matches.Count - 1)):
                if not matches[n].Groups["Tags"].Value.lower() in fields:
                    match = matches[n]
                    newmatch = True
                    break
            if newmatch == False:
                if settings["Unknown"] != "":
                    unknowntag = settings["Unknown"]
                else:
                    continue

        #Check if the match can be found in () in the series, title or altseries
        titlefields = [
            book.ShadowSeries, book.ShadowTitle, book.AlternateSeries
        ]
        abort = False
        for title in titlefields:
            titleresult = re.search("\((?P<match>.*)\)", title)
            if titleresult != None and titleresult.group(
                    "match").lower() == match.Groups["Tags"].Value.lower():
                #The match is part of the title, series or altseries so skip it
                print "The match is part of the title, series or altseries"
                abort = True
                break
        if abort == True:
            if settings["Unknown"] != "":
                unknowntag = settings["Unknown"]
            else:
                continue

        #Get a list of the old ScanInformation
        oldtags = book.ScanInformation
        ListOfTagsTemp = oldtags.split(",")
        if '' in ListOfTagsTemp:
            ListOfTagsTemp.remove('')

        ListOfTags = []
        if ListOfTagsTemp != []:
            for indtag in ListOfTagsTemp:
                ListOfTags.append(indtag.strip())

        #Create our new tag
        if unknowntag != "":
            newtag = settings["Prefix"] + unknowntag
        else:
            newtag = settings["Prefix"] + regexReplace.Replace(
                match.Groups["Tags"].Value.strip("_, "), "")

        if newtag not in ListOfTags:
            ListOfTags.append(newtag)

        #Sort alphabeticaly to be neat
        ListOfTags.sort()

        #Add to ScanInformation field
        book.ScanInformation = ", ".join(ListOfTags)
def create_image_regex(first_image_url, second_image_url):

    if debug:
        print "Starting to create an image regex with urls:\n" + first_image_url + "\n" + second_image_url

    base = get_string_intersect(first_image_url, second_image_url)

    baseuri = Uri(base)

    relativeuri = first_page_uri.MakeRelativeUri(baseuri)

    if relativeuri.IsAbsoluteUri:
        domain = relativeuri.Scheme + "://" + relativeuri.Host
        relative = relativeuri.AbsolutePath

    else:
        domain = baseuri.Scheme + "://" + baseuri.Host
        relative = relativeuri.OriginalString

    domain = escape_regex_characters(domain)

    relative = relative.lstrip("/")

    relative = escape_regex_characters(relative)

    relative = replace_date_values(relative)

    domain = Regex.Replace(domain, "www\\\\.", "(?:www\\.)?",
                           RegexOptions.IgnoreCase)

    baseregex = "(?:" + domain + ")?\\.*/?" + relative

    if debug: print "baseregex is: " + baseregex

    regex = "src\\s*=\\s*(?([\"'])[\"'](?<link>" + baseregex + "[^\"']+)[\"']|(?<link>" + baseregex + "[^\\s<>]+))"

    result, matches = check_created_image_regex(first_image_url,
                                                second_image_url, regex)

    if not result:

        regex = "(?([\"'])[\"'](?<link>" + baseregex + "[^\"']+)[\"']|(?<link>" + baseregex + "[^\\s<>]+))"

        result, matches = check_created_image_regex(first_image_url,
                                                    second_image_url, regex)

        if not result:
            #Sometimes the relative link creator makes a bad split, in that case try one other way:
            #Check if we made the split before:
            if baseuri.Host == first_page_uri.Host:

                domain = baseuri.Scheme + "://" + baseuri.Host
                relative = baseuri.AbsolutePath

                relative = relative.lstrip("/")

                domain = escape_regex_characters(domain)

                relative = escape_regex_characters(relative)

                relative = replace_date_values(relative)

                domain = Regex.Replace(domain, "www\\\\.", "(?:www\\.)?",
                                       RegexOptions.IgnoreCase)

                baseregex = "(?:" + domain + ")?\\.*/?" + relative

                if debug: print "baseregex is: " + baseregex

                regex = "src\\s*=\\s*(?([\"'])[\"'](?<link>" + baseregex + "[^\"']+)[\"']|(?<link>" + baseregex + "[^\\s<>]+))"

                result, matches = check_created_image_regex(
                    first_image_url, second_image_url, regex)

                if not result:
                    regex = "(?([\"'])[\"'](?<link>" + baseregex + "[^\"']+)[\"']|(?<link>" + baseregex + "[^\\s<>]+))"

                    result, matches = check_created_image_regex(
                        first_image_url, second_image_url, regex)

                    if not result:

                        if debug: print "Unable create a working image regex"
                        return None

    if debug: print "Found a working image regex: " + regex
    imgregex = ImageRegex(regex)
    imgregex._matches = matches
    return imgregex