def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252", "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata("storyId", self.parsedUrl.query.split("=")[1])

        if self.parsedUrl.path.split("/")[1] == "wiktt":
            self.story.addToList("category", "Harry Potter")
            self.section = "/wiktt/efiction/"
            self.dateformat = "%m/%d/%Y"
        else:
            self.story.addToList("category", "Originals")
            self.section = "/efiction/"
            self.dateformat = "%b %d, %Y"

        # normalized story URL.
        self._setURL(
            "http://" + self.getSiteDomain() + self.section + "viewstory.php?sid=" + self.story.getMetadata("storyId")
        )

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata("siteabbrev", "msq")
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])

        self.section = self.parsedUrl.path.split('/', )[1]

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/' + self.section +
                     '/viewstory.php?sid=' + self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'sghp')

        # If all stories from the site fall into the same category,
        # the site itself isn't likely to label them as such, so we
        # do.  Can't use extracategories, could be Atlantis or SG-1
        if 'atlantis' in self.section:
            self.story.addToList("category", "Stargate: Atlantis")
        else:
            self.story.addToList("category", "Stargate: SG-1")

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y.%m.%d"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8",
                       "Windows-1252"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.

        self.story.setMetadata('siteabbrev', 'litero')

        # normalize to first chapter.  Not sure if they ever have more than 2 digits.
        storyId = self.parsedUrl.path.split('/', )[2]
        # replace later chapters with first chapter but don't remove numbers
        # from the URL that disambiguate stories with the same title.
        storyId = re.sub("-ch-?\d\d", "", storyId)
        self.story.setMetadata('storyId', storyId)

        ## accept m(mobile)url, but use www.
        url = re.sub(
            "^(www|german|spanish|french|dutch|italian|romanian|portuguese|other)\.i",
            "\1", url)

        ## strip ?page=...
        url = re.sub("\?page=.*$", "", url)

        ## set url
        self._setURL(url)

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = '%m/%d/%y'
Beispiel #4
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.path.split('/', )[3])

        # www.dokuga.com has two 'sections', shown in URL as
        # 'fanfiction' and 'spark' that change how things should be
        # handled.
        # http://www.dokuga.com/fanfiction/story/7528/1
        # http://www.dokuga.com/spark/story/7299/1
        self.section = self.parsedUrl.path.split('/', )[1]

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/' +
                     self.parsedUrl.path.split('/', )[1] + '/story/' +
                     self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'dkg')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        if 'fanfiction' in self.section:
            self.dateformat = "%d %b %Y"
        else:
            self.dateformat = "%m-%d-%y"
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
     self.story.setMetadata('siteabbrev','mm')
     self.decode = ["Windows-1252",
                    "utf8"] # 1252 is a superset of iso-8859-1.
                            # Most sites that claim to be
                            # iso-8859-1 (and some that claim to be
                            # utf8) are really windows-1252.
     
     # get storyId from url--url validation guarantees query correct
     m = re.match(self.getSiteURLPattern(),url)
     if m:
         if m.group('id'):
             self.story.setMetadata('storyId',m.group('id'))
         elif m.group('id2'):
             self.story.setMetadata('storyId',m.group('id2'))
         elif m.group('id3'):
             self.story.setMetadata('storyId',m.group('id2'))
         
         # normalized story URL.
         self._setURL('http://' + self.getSiteDomain() + '/fanfic/view_st.php/'+self.story.getMetadata('storyId'))
     else:
         raise exceptions.InvalidStoryURL(url,
                                          self.getSiteDomain(),
                                          self.getSiteExampleURLs())
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.is_adult = False

        # normalized story URL.

        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))

            # normalized story URL.
            self._setURL('https://' + self.getSiteDomain() + '/archive/' +m.group('cat') +
             '/' + self.story.getMetadata('storyId') +'.shtml')
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        ## each adapter needs to have a unique abbreviation, whih is set here.
        self.story.setMetadata('siteabbrev', 'fga')

        # The date format will vary from site to site.
        # The below website give the list of variables that can be used to formulate the
        # correct format
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"

        # This site has the entire story on one page, so I am initializing a variable to hold the
        # soup so that the getChaperText function doesn't have to use bandwidth to get it again.
        self.html = ''
Beispiel #7
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        # logger.debug("AdultFanFictionOrgAdapter.__init__ - url='{0}'".format(url))

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])

        #Setting the 'Zone' for each "Site"
        self.zone = self.parsedUrl.netloc.split('.')[0]

        # normalized story URL.(checking self.zone against list
        # removed--it was redundant w/getAcceptDomains and
        # getSiteURLPattern both)
        self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
        #self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        #self.story.setMetadata('siteabbrev',self.getSiteAbbrev())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev',self.zone+'aff')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y-%m-%d"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])


        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','efp')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d/%m/%y"
Beispiel #9
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])


        # www.dokuga.com has two 'sections', shown in URL as
        # 'fanfiction' and 'spark' that change how things should be
        # handled.
        # http://www.dokuga.com/fanfiction/story/7528/1
        # http://www.dokuga.com/spark/story/7299/1
        self.section=self.parsedUrl.path.split('/',)[1]

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/'+self.parsedUrl.path.split('/',)[1]+'/story/'+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','dkg')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        if 'fanfiction' in self.section:
            self.dateformat = "%d %b %Y"
        else:
            self.dateformat = "%m-%d-%y"
Beispiel #10
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = '******'  # if left empty, site doesn't return any message at all.
        self.password = ''
        self.is_adult = False

        # get storyId from url
        # https://inkbunny.net/submissionview.php?id=1342100 --- old style story url
        # https://inkbunny.net/s/1234567 --  new style story url
        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))
            # normalized story URL. gets rid of chapter if there, left with chapter index URL
            nurl = "https://" + self.getSiteDomain(
            ) + "/s/" + self.story.getMetadata('storyId')
            self._setURL(nurl)
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'ibnet')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %Y %H:%M"

        # This is a 1 story/page site, so I'm initializing the soup variable here for the getChapterText Function
        self.soup = None
Beispiel #11
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8",
                       "Windows-1252"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only fiction/1234
        self.story.setMetadata(
            'storyId',
            re.match('/fiction/(\d+)(:/.+)?$',
                     self.parsedUrl.path).groups()[0])

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/fiction/' +
                     self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'rylrdl')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = '%d/%m/%Y %H:%M:%S %p'
Beispiel #12
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])

        # normalized story URL.
        # XXX Most sites don't have the /fanfic part.  Replace all to remove it usually.
        self._setURL('http://' + self.getSiteDomain() +
                     '/fiction/viewstory.php?sid=' +
                     self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'btf')  # XXX

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %Y"  # XXX
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252", "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata("storyId", m.group("id"))

            # normalized story URL.
            self._setURL(
                "http://www."
                + self.getSiteDomain()
                + "/blog/archive/"
                + self.story.getMetadata("storyId")
                + "-"
                + m.group("name")
                + "/"
            )
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(), self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata("siteabbrev", "idn")

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %B %Y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False
        
        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
        
        
        # pommedesang.com has two 'sections', shown in URL as
        # 'efiction' and 'sds' that change how things should be
        # handled.
        # http://pommedesang.com/efiction/viewstory.php?sid=1234
        # http://pommedesang.com/sds/viewstory.php?sid=1234
        self.section=self.parsedUrl.path.split('/',)[1]
        
        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
        
        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','pmds')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        if 'efiction' in self.section:
            self.dateformat = "%b %d, %Y"
        else:
            self.dateformat = "%m/%d/%y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        logger.debug("StoriesOnlineNetAdapter.__init__ - url='%s'" % url)

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url
        self.story.setMetadata(
            'storyId',
            self.parsedUrl.path.split('/', )[2].split(':')[0])
        if 'storyInfo' in self.story.getMetadata('storyId'):
            self.story.setMetadata('storyId',
                                   self.parsedUrl.query.split('=', )[1])

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/s/' +
                     self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', self.getSiteAbbrev())

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y-%m-%d"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8", "Windows-1252"]

        self.story.setMetadata('siteabbrev', 'bdsmgesch')

        # Replace possible chapter numbering
        chapterMatch = _REGEX_TRAILING_DIGIT.search(url)
        if chapterMatch is None:
            self.maxChapter = 1
        else:
            self.maxChapter = int(chapterMatch.group(1))
        # url = re.sub(_REGEX_TRAILING_DIGIT, "1", url)

        # set storyId
        self.story.setMetadata(
            'storyId',
            re.compile(self.getSiteURLPattern()).match(url).group('storyId'))

        # normalize URL
        self._setURL('http://%s/%s' %
                     (self.getSiteDomain(), self.story.getMetadata('storyId')))

        self.dateformat = '%d. %m %Y - %H:%M'
Beispiel #17
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8",
                       "Windows-1252"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.path.split('/', )[2])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))

            # normalized story URL.
            self._setURL(self.getURLPrefix() + '/' + m.group('tp') + '/' +
                         self.story.getMetadata('storyId') + '/')
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'fsb')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y at %I:%M %p"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8",
                       "Windows-1252"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.


        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))

            # normalized story URL.
            self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+self.story.getMetadata('storyId')+'/')
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','fsb')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y at %I:%M %p"
Beispiel #19
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])

        # normalized story URL.
        if "explicit" in self.parsedUrl.netloc:
            self._setURL('http://explicit.' + self.getSiteDomain() +
                         '/viewstory.php?sid=' +
                         self.story.getMetadata('storyId'))
            self.dateformat = "%d/%b/%y"
        else:
            self._setURL('http://' + self.getSiteDomain() +
                         '/viewstory.php?sid=' +
                         self.story.getMetadata('storyId'))
            self.dateformat = "%d %b %Y"

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'pffa')
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        self.decode = ["Windows-1252", "utf8",
                       "iso-8859-1"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.is_adult = False

        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))

            # normalized story URL.
            self._setURL('https://' + self.getSiteDomain() + '/' +
                         m.group('category') + '/' + m.group('author') + '/' +
                         self.story.getMetadata('storyId') + '/')
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'trekffnet')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"
Beispiel #21
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        story_id = get_url_path_segments(url)[1]
        self._setURL(STORY_URL_TEMPLATE % story_id)
        self.story.setMetadata('storyId', story_id)
        self.story.setMetadata('siteabbrev', SITE_DOMAIN)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])

        if self.parsedUrl.path.split('/', )[1] == 'wiktt':
            self.story.addToList("category", "Harry Potter")
            self.section = '/wiktt/efiction/'
            self.dateformat = "%m/%d/%Y"
        else:
            self.story.addToList("category", "Originals")
            self.section = '/efiction/'
            self.dateformat = "%b %d, %Y"

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + self.section +
                     'viewstory.php?sid=' + self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'msq')
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.setHeader()

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))

            # normalized story URL. gets rid of chapter if there, left with ch 1 URL on this site
            nurl = "http://"+self.getSiteDomain()+"/fanfictions/index.php?act=vie&id="+self.story.getMetadata('storyId')
            self._setURL(nurl)
            #argh, this mangles the ampersands I need on metadata['storyUrl']
            #will set it this way
            self.story.setMetadata('storyUrl',nurl,condremoveentities=False)
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())


        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','bnfnet')
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""

        # normalized story URL.
        # get story/file and storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))

            # normalized story URL.
            self._setURL('http://' + self.getSiteDomain() + '/' + m.group('filestory') + '.php?' + m.group('filestory') + '=' + self.story.getMetadata('storyId'))
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        self.story.setMetadata('siteabbrev','ressec')
        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %Y" # 20 Nov 2005
    def __init__(self, config, url):
        # save for reader processing.
        self.reader = False
        self.post_cache = {}
        self.threadmarks_for_reader = {}

        #logger.info("init url: "+url)
        BaseSiteAdapter.__init__(self, config, url)

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            #logger.debug("groupdict:%s"%m.groupdict())
            if m.group('anchorpost'):
                self.story.setMetadata('storyId',m.group('anchorpost'))
                self._setURL(self.getURLPrefix() + '/posts/'+m.group('anchorpost')+'/')
            else:
                self.story.setMetadata('storyId',m.group('id'))
                # normalized story URL.
                title = m.group('title') or ""
                self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+title+self.story.getMetadata('storyId')+'/')
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','fsb')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y at %I:%M %p"
Beispiel #26
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        story_id = get_url_path_segments(url)[1]
        self._setURL(STORY_URL_TEMPLATE % story_id)
        self.story.setMetadata('storyId', story_id)
        self.story.setMetadata('siteabbrev', SITE_DOMAIN)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        logger.debug(
            "FanficAuthorsNetAdapter.__init__ - url='{0}'".format(url))

        self.decode = ["utf8", "Windows-1252",
                       "iso-8859-1"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url
        self.story.setMetadata('storyId', self.parsedUrl.path.split('/', )[1])

        #Setting the 'Zone' for each "Site"
        self.zone = self.parsedUrl.netloc.replace('.fanficauthors.net', '')

        # normalized story URL.
        self._setURL('http://{0}.{1}/{2}/'.format(
            self.zone, self.getBaseDomain(),
            self.story.getMetadata('storyId')))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'ffa')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %y"
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
     
     self.decode = ["Windows-1252",
                    "utf8"] # 1252 is a superset of iso-8859-1.
                            # Most sites that claim to be
                            # iso-8859-1 (and some that claim to be
                            # utf8) are really windows-1252.
     self.username = "******" # if left empty, site doesn't return any message at all.
     self.password = ""
     self.is_adult=False
     
     # get storyId from url--url validation guarantees query correct
     m = re.match(self.getSiteURLPattern(),url)
     if m:
         self.story.setMetadata('storyId',m.group('id'))
         
         # normalized story URL. gets rid of chapter if there, left with chapter index URL
         nurl = "http://"+self.getSiteDomain()+"/historias/"+self.story.getMetadata('storyId')
         self._setURL(nurl)
     else:
         raise exceptions.InvalidStoryURL(url,
                                          self.getSiteDomain(),
                                          self.getSiteExampleURLs())
     
     
     # Each adapter needs to have a unique site abbreviation.
     self.story.setMetadata('siteabbrev','potficscom')
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        # logger.debug("AdultFanFictionOrgAdapter.__init__ - url='{0}'".format(url))

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])

        #Setting the 'Zone' for each "Site"
        self.zone = self.parsedUrl.netloc.split('.')[0]

        # normalized story URL.(checking self.zone against list
        # removed--it was redundant w/getAcceptDomains and
        # getSiteURLPattern both)
        self._setURL('http://{0}.{1}/story.php?no={2}'.format(self.zone, self.getBaseDomain(), self.story.getMetadata('storyId')))
        #self._setURL('http://' + self.zone + '.' + self.getBaseDomain() + '/story.php?no='+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        #self.story.setMetadata('siteabbrev',self.getSiteAbbrev())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev',self.zone+'aff')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y-%m-%d"
Beispiel #30
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))

            # normalized story URL. gets rid of chapter if there, left with chapter index URL
            nurl = "http://" + self.getSiteDomain(
            ) + "/historias/" + self.story.getMetadata('storyId')
            self._setURL(nurl)
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'potficscom')
Beispiel #31
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        self.full_work_soup = None
        self.use_full_work_soup = True

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.path.split('/', )[2])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))

            # normalized story URL.
            self._setURL('https://' + self.getSiteDomain() + '/works/' +
                         self.story.getMetadata('storyId'))
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'ao3')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y-%b-%d"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)

        self.decode = ["utf8",
                       "Windows-1252"] # 1252 is a superset of iso-8859-1.
                            # Most sites that claim to be
                            # iso-8859-1 (and some that claim to be
                            # utf8) are really windows-1252.

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','litero')

        # normalize to first chapter.  Not sure if they ever have more than 2 digits.
        storyId = self.parsedUrl.path.split('/',)[2]
        # replace later chapters with first chapter but don't remove numbers
        # from the URL that disambiguate stories with the same title.
        storyId = re.sub("-ch-?\d\d", "", storyId)
        self.story.setMetadata('storyId', storyId)

        ## accept m(mobile)url, but use www.
        url = re.sub("^(www|german|spanish|french|dutch|italian|romanian|portuguese|other)\.i",
                              "\1",
                              url)

        ## strip ?page=...
        url = re.sub("\?page=.*$", "", url)

        ## set url
        self._setURL(url)

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"
Beispiel #33
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["utf8",
                       "Windows-1252"
                       ] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only fiction/1234
        self.story.setMetadata('storyId',re.match('/fiction/(\d+)(:/.+)?$',self.parsedUrl.path).groups()[0])


        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/fiction/'+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','rylrdl')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = '%d/%m/%Y %H:%M:%S %p'
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = '******'  # if left empty, site doesn't return any message at all.
        self.password = ''
        self.is_adult = False

        # get storyId from url
        # https://inkbunny.net/submissionview.php?id=1342100 --- old style story url
        # https://inkbunny.net/s/1234567 --  new style story url
        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))
            # normalized story URL. gets rid of chapter if there, left with chapter index URL
            nurl = "https://"+self.getSiteDomain()+"/s/"+self.story.getMetadata('storyId')
            self._setURL(nurl)
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'ibnet')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %Y %H:%M"

        # This is a 1 story/page site, so I'm initializing the soup variable here for the getChapterText Function
        self.soup = None
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = ""
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            self.story.setMetadata('storyId',m.group('id'))

            # normalized story URL.
            self._setURL('https://' + self.getSiteDomain() + '/story/view/'+self.story.getMetadata('storyId'))
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','asnff')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y-%b-%d"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False
        
        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
        
        self.section=self.parsedUrl.path.split('/',)[1]
        
        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId'))
        
        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','sghp')

        # If all stories from the site fall into the same category,
        # the site itself isn't likely to label them as such, so we
        # do.  Can't use extracategories, could be Atlantis or SG-1
        if 'atlantis' in self.section:
            self.story.addToList("category","Stargate: Atlantis")
        else:
            self.story.addToList("category","Stargate: SG-1")

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%Y.%m.%d"
Beispiel #37
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        logger.debug("LiteroticaComAdapter:__init__ - url='%s'" % url)

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','litero')

        # normalize to first chapter.  Not sure if they ever have more than 2 digits.
        storyId = self.parsedUrl.path.split('/',)[2]
        # replace later chapters with first chapter but don't remove numbers
        # from the URL that disambiguate stories with the same title.
        storyId = re.sub("-ch-?\d\d", "", storyId)
        self.story.setMetadata('storyId', storyId)

        ## accept m(mobile)url, but use www.
        url = re.sub("^(www|german|spanish|french|dutch|italian|romanian|portuguese|other)\.i",
                              "\1",
                              url)

        ## strip ?page=...
        url = re.sub("\?page=.*$", "", url)

        ## set url
        self._setURL(url)

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252", "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata("storyId", self.parsedUrl.query.split("=")[1])

        # normalized story URL.
        # XXX Most sites don't have the /fanfic part.  Replace all to remove it usually.
        self._setURL(
            "http://" + self.getSiteDomain() + "/fanfics/viewstory.php?sid=" + self.story.getMetadata("storyId")
        )

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata("siteabbrev", "sjn")  # XXX

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y"  # XXX
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])


        # pommedesang.com has two 'sections', shown in URL as
        # 'efiction' and 'sds' that change how things should be
        # handled.
        # http://pommedesang.com/efiction/viewstory.php?sid=1234
        # http://pommedesang.com/sds/viewstory.php?sid=1234
        self.section=self.parsedUrl.path.split('/',)[1]

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/'+self.section+'/viewstory.php?sid='+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','pmds')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        if 'efiction' in self.section:
            self.dateformat = "%b %d, %Y"
        else:
            self.dateformat = "%m/%d/%y"
Beispiel #40
0
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
         
     self.decode = ["Windows-1252",
                    "utf8"] # 1252 is a superset of iso-8859-1.
                            # Most sites that claim to be
                            # iso-8859-1 (and some that claim to be
                            # utf8) are really windows-1252.
     
     self.username = "******" # if left empty, site doesn't return any message at all.
     self.password = ""
     
     # normalized story URL.
     # get story/file and storyId from url--url validation guarantees query correct
     m = re.match(self.getSiteURLPattern(),url)
     if m:
         self.story.setMetadata('storyId',m.group('id'))
         
         # normalized story URL.
         self._setURL('http://' + self.getSiteDomain() + '/' + m.group('filestory') + '.php?' + m.group('filestory') + '=' + self.story.getMetadata('storyId'))
     else:
         raise exceptions.InvalidStoryURL(url,
                                          self.getSiteDomain(),
                                          self.getSiteExampleURLs())
     
     self.story.setMetadata('siteabbrev','ressec')
     # The date format will vary from site to site.
     # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
     self.dateformat = "%d %b %Y" # 20 Nov 2005
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"] # 1252 is a superset of iso-8859-1.
                               # Most sites that claim to be
                               # iso-8859-1 (and some that claim to be
                               # utf8) are really windows-1252.
        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False
        
        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
        
        # normalized story URL.
        if "explicit" in self.parsedUrl.netloc:
            self._setURL('http://explicit.' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
            self.dateformat = "%d/%b/%y"
        else:
            self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
            self.dateformat = "%d %b %Y"
        
        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','pffa')
Beispiel #42
0
    def __init__(self, config, url):
        # save for reader processing.
        self.reader = False
        self.post_cache = {}

        #logger.info("init url: "+url)
        BaseSiteAdapter.__init__(self, config, url)

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])

        # get storyId from url--url validation guarantees query correct
        m = re.match(self.getSiteURLPattern(),url)
        if m:
            #logger.debug("groupdict:%s"%m.groupdict())
            if m.group('anchorpost'):
                self.story.setMetadata('storyId',m.group('anchorpost'))
                self._setURL(self.getURLPrefix() + '/posts/'+m.group('anchorpost')+'/')
            else:
                self.story.setMetadata('storyId',m.group('id'))
                # normalized story URL.
                self._setURL(self.getURLPrefix() + '/'+m.group('tp')+'/'+self.story.getMetadata('storyId')+'/')
        else:
            raise exceptions.InvalidStoryURL(url,
                                             self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','fsb')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y at %I:%M %p"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # Getting the storyId from url - http://www.area52hkh.net/[Folder]/[AuthorID]/[STORYID].php
        # I'm setting these variables here, because I use them later.
        self.folder = self.parsedUrl.path.split('/', )[1]
        self.authorId = self.parsedUrl.path.split('/', )[2]
        self.storyId = self.parsedUrl.path.split('/', )[3].replace(
            '.php', '').replace('.htm', '').replace('.html', '')
        self.extension = self.parsedUrl.path.split('.')[1]

        self.story.setMetadata('storyId', self.storyId)
        self.story.setMetadata('authorId', self.authorId)

        # normalized story URL.
        self._setURL('http://{0}/{1}/{2}/{3}.{4}'.format(
            self.getSiteDomain(), self.folder,
            self.story.getMetadata('authorId'),
            self.story.getMetadata('storyId'), self.extension))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'a52hkh')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.is_adult = False

        # normalized story URL.

        m = re.match(self.getSiteURLPattern(), url)
        if m:
            self.story.setMetadata('storyId', m.group('id'))

            # normalized story URL.
            self._setURL('http://' + self.getSiteDomain() + '/archive/' +
                         m.group('cat') + '/' +
                         self.story.getMetadata('storyId') + '.html')
        else:
            raise exceptions.InvalidStoryURL(url, self.getSiteDomain(),
                                             self.getSiteExampleURLs())

        ## each adapter needs to have a unique abbreviation, whih is set here.
        self.story.setMetadata('siteabbrev', 'bfa')

        # The date format will vary from site to site.
        # The below website give the list of variables that can be used to formulate the
        # correct format
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%y"

        # This site has the entire story on one page, so I am initializing a variable to hold the
        # soup so that the getChaperText function doesn't have to use bandwidth to get it again.
        self.html = ''
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # Getting the storyId from url - http://www.area52hkh.net/[Folder]/[AuthorID]/[STORYID].php
        # I'm setting these variables here, because I use them later.
        self.folder = self.parsedUrl.path.split('/',)[1]
        self.authorId = self.parsedUrl.path.split('/',)[2]
        self.storyId = self.parsedUrl.path.split('/',)[3].replace('.php','').replace('.htm','').replace('.html','')
        self.extension = self.parsedUrl.path.split('.')[1]

        self.story.setMetadata('storyId', self.storyId)
        self.story.setMetadata('authorId',self.authorId)

        # normalized story URL.
        self._setURL('http://{0}/{1}/{2}/{3}.{4}'.format(self.getSiteDomain(),
                                                         self.folder,
                                                         self.story.getMetadata('authorId'),
                                                         self.story.getMetadata('storyId'),
                                                         self.extension))


        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','a52hkh')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%b %d, %Y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.username = "******" # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult=False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])


        if self.parsedUrl.path.split('/',)[1] == 'wiktt':
            self.story.addToList("category","Harry Potter")
            self.section='/wiktt/efiction/'
            self.dateformat = "%m/%d/%Y"
        else:
            self.story.addToList("category","Originals")
            self.section='/efiction/'
            self.dateformat = "%b %d, %Y"


        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + self.section + 'viewstory.php?sid='+self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev','msq')
Beispiel #47
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.decode = ["Windows-1252",
                       "utf8"]  # 1252 is a superset of iso-8859-1.
        # Most sites that claim to be
        # iso-8859-1 (and some that claim to be
        # utf8) are really windows-1252.
        self.username = "******"  # if left empty, site doesn't return any message at all.
        self.password = ""
        self.is_adult = False

        # get storyId from url--url validation guarantees query is only sid=1234
        self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])

        # normalized story URL.
        self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid=' +
                     self.story.getMetadata('storyId'))

        # Each adapter needs to have a unique site abbreviation.
        self.story.setMetadata('siteabbrev', 'scacf')

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%m/%d/%Y"
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.story.setMetadata('siteabbrev', 'tolkien')

        self.dateformat = '%B %d, %Y'

        self._normalizeURL(url)
Beispiel #49
0
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
     self.story.setMetadata('siteabbrev','tst1')
     self.crazystring = u" crazy tests:[bare amp(&) quote(') amp(&) gt(>) lt(<) ATnT(AT&T) pound(£)]"
     # get storyId from url--url validation guarantees query is only sid=1234
     self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
     self.username=''
     self.is_adult=False
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        self.story.setMetadata('siteabbrev','tolkien')

        self.dateformat = '%B %d, %Y'

        self._normalizeURL(url)
Beispiel #51
0
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
     self.story.setMetadata('siteabbrev', 'tst1')
     self.crazystring = u"tests:[bare amp(&) qt(') amp(&) gt(>) lt(<) ATnT(AT&T) L(£) Onna(女)]"
     # get storyId from url--url validation guarantees query is only sid=1234
     self.story.setMetadata('storyId', self.parsedUrl.query.split('=', )[1])
     self.username = ''
     self.is_adult = False
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        url_tokens = self.parsedUrl.path.split('/')
        story_id = url_tokens[url_tokens.index('story') + 1]

        self.story.setMetadata('storyId', story_id)
        self._setURL(self.STORY_URL_TEMPLATE % story_id)
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        url_tokens = self.parsedUrl.path.split('/')
        story_id = url_tokens[url_tokens.index('story') + 1]

        self.story.setMetadata('storyId', story_id)
        self._setURL(self.STORY_URL_TEMPLATE % story_id)
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        query_data = urlparse.parse_qs(self.parsedUrl.query)
        story_id = query_data['sid'][0]

        self.story.setMetadata('storyId', story_id)
        self._setURL(self.VIEW_STORY_URL_TEMPLATE % int(story_id))
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
Beispiel #55
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        query_data = urlparse.parse_qs(self.parsedUrl.query)
        story_no = query_data['no'][0]

        self.story.setMetadata('storyId', story_no)
        self._setURL(self.READ_URL_TEMPLATE % story_no)
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        query_data = urlparse.parse_qs(self.parsedUrl.query)
        story_no = query_data['no'][0]

        self.story.setMetadata('storyId', story_no)
        self._setURL(self.READ_URL_TEMPLATE % story_no)
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
Beispiel #57
0
 def __init__(self, config, url):
     BaseSiteAdapter.__init__(self, config, url)
     self.story.setMetadata('siteabbrev','fw')
     
     # get storyId from url--url validation guarantees second part is storyId
     self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
     
     self.username = "******"
     self.password = ""
Beispiel #58
0
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)

        query_data = urlparse.parse_qs(self.parsedUrl.query)
        story_id = query_data['sid'][0]

        self.story.setMetadata('storyId', story_id)
        self._setURL(self.VIEW_STORY_URL_TEMPLATE % int(story_id))
        self.story.setMetadata('siteabbrev', self.SITE_ABBREVIATION)
    def __init__(self, config, url):
        BaseSiteAdapter.__init__(self, config, url)
        self.story.setMetadata('siteabbrev','fimficnet')
        self.story.setMetadata('storyId', self.parsedUrl.path.split('/',)[2])
        self._setURL("http://"+self.getSiteDomain()+"/story/"+self.story.getMetadata('storyId')+"/")
        self.is_adult = False

        # The date format will vary from site to site.
        # http://docs.python.org/library/datetime.html#strftime-strptime-behavior
        self.dateformat = "%d %b %Y"