Python Readability Exemples, readability.Readability Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : api.py Projet : udimilo/udi0312

def add_article():
    url = request.vars.url
    board = cacher.get('board',long(request.vars.board))
    article = logic.get_article_by_url(url)

    if article is None:
        r = Readability()
        json = r.content(url)
        article = db.article.insert(
            url=json['url'],
            readability_url=json['short_url'],
            title=json['title'],
            #content=json['content'],
            domain=json['domain'],
            author=json['author'],
            excerpt=json['excerpt'],
            word_count=json['word_count'],
            total_pages=json['total_pages'],
            date_published=json['date_published'],
            next_page_id=json['next_page_id'],
            rendered_pages=json['rendered_pages'],            
        )
    
    pin = logic.add_pin(article, board)
    if request.vars.linkedin and request.env.http_host != '127.0.0.1:8080':
        logic.share_on_linkedin(session.linkedin, pin)

    return 'Success'

Exemple #2

0

Afficher le fichier

Fichier : metricsbycommits.py Projet : gabrielsmenezes/pesquisamestrado

def get_metrics(commit, framework, sample, sample_path, udb_path):
    metrics = get_understand_metrics(framework, sample, udb_path, sample_path)
    metrics = get_necessary_metrics(metrics)
    metrics = adding_commit_data(commit, metrics)
    r = Readability(sample)
    readability = r.getReadability()
    del r
    metrics.append(readability)
    return metrics

Exemple #3

0

Afficher le fichier

def run_FKGL(output_dir):
    with open(output_dir) as f:
        output = f.readlines()
        output = [d.lower().strip() for d in output]

    output_final = " ".join(output)
    rd = Readability(output_final)
    score = rd.FleschKincaidGradeLevel()
    return score

Exemple #4

0

Afficher le fichier

Fichier : logic.py Projet : udimilo/udi0312

def get_article(id):
    article = cacher.get('article', id)
    if not article.has_key('content'):
        #fetch content from readability in real time
        r = Readability()
        json = r.content(article['url'])
        article['content'] = json['content'].encode('UTF8', 'replace')
        cacher.set(article)

    return article

Exemple #5

0

Afficher le fichier

Fichier : main.py Projet : vershininds/MiniReadabilityTenzor

def main(argv):
    params = InputParamsHandler(argv)

    dom_doc = Network().get_dom_doc(params.get_url())
    reader = Readability(dom_doc)

    article = ""
    article += (reader.get_title() + "\n\n")
    article += reader.get_article()

    reader.save_content(params.get_dir_name(), params.get_article_name(), article)

Exemple #6

0

Afficher le fichier

Fichier : readabilityAnalysis.py Projet : jperdek/semanticAnalysis

 def __init__(self, text):
     self.readability = Readability(text)
     self.FLESCH_KINCAID = ['score', 'grade_level']
     self.FLESCH_EASE = ['score', 'ease', 'grade_level']
     self.DALE_CHALL = ['score', 'grade_level']
     self.ARI = ['score', 'grade_level', 'ages']
     self.CLI = ['score', 'grade_level']
     self.GUNNING_FOG = ['score', 'grade_level']
     self.SMOG = ['score', 'grade_level']
     self.SPACHE = ['score', 'grade_level']
     self.LINSEAR_WRITE = ['score', 'grade_level']
     self.values_index = self.initialize_value_index_array()

Exemple #7

0

Afficher le fichier

def show_stat(text):
    rd = Readability(text)
    print 'Test text:'
    print '"%s"\n' % text
    print 'ARI: ', rd.ARI()
    print 'FleschReadingEase: ', rd.FleschReadingEase()
    print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel()
    print 'GunningFogIndex: ', rd.GunningFogIndex()
    print 'SMOGIndex: ', rd.SMOGIndex()
    print 'ColemanLiauIndex: ', rd.ColemanLiauIndex()
    print 'LIX: ', rd.LIX()
    print 'RIX: ', rd.RIX()

Exemple #8

0

Afficher le fichier

Fichier : flask_app.py Projet : KosGian82/flesch-kincaid-grade-level-api

def get_fk_grade_level(text):
    # The text must contain at least 100 words
    if len(text.split()) < 100:
        result = "ERROR: This piece of text is too short to get a Flesch Kincaid grade level."
    else:
        # Instantiate a Readability object
        r = Readability(text)
        # Get the F-K score metric
        fk = r.flesch_kincaid()
        # Get the F-K grade level
        result = fk.grade_level
    return result

Exemple #9

0

Afficher le fichier

    def test_smog(self):
        text = """
        In linguistics, the Gunning fog index is a readability test for English writing. The index estimates the years of formal education a person needs to understand the text on the first reading. For instance, a fog index of 12 requires the reading level of a United States high school senior (around 18 years old). The test was developed in 1952 by Robert Gunning, an American businessman who had been involved in newspaper and textbook publishing.
        The fog index is commonly used to confirm that text can be read easily by the intended audience. Texts for a wide audience generally need a fog index less than 12. Texts requiring near-universal understanding generally need an index less than 8.
        """
        text = ' '.join(text for i in range(0, 5))

        readability = Readability(text)
        r = readability.smog()

        print(r)
        self.assertEqual(12.516099999999998, r.score)
        self.assertEqual('13', r.grade_level)

Exemple #10

0

Afficher le fichier

def get_read_stats(text):
    read = {}
    # readability stats
    rd = Readability(text)
    read['ari'] = rd.ARI()
    read['flesch_reading_ease'] = rd.FleschReadingEase()
    read['flesch_kincaid_grade_level'] = rd.FleschKincaidGradeLevel()
    read['gunning_fog_index'] = rd.GunningFogIndex()
    read['smog_index'] = rd.SMOGIndex()
    read['coleman_liau_index'] = rd.ColemanLiauIndex()
    read['lix'] = rd.LIX()
    read['rix'] = rd.RIX()
    return read

Exemple #11

0

Afficher le fichier

Fichier : readability_tk.py Projet : AlexMcCullie/readability-python-tk

def analyse_document():
    document = sctxt.get(1.0, tk.END)
    read = Readability()
    sentence_count, word_count, syllable_count, index = \
        read.calculate_readability(document)

    global sentence_display
    sentence_display.set("Sentences: " + str(sentence_count))
    global word_display
    word_display.set("Words: " + str(word_count))
    global syllable_display
    syllable_display.set("Syllables: " + str(syllable_count))
    global index_display
    index_display.set("Index: " + "%6.2f" % index)

Exemple #12

0

Afficher le fichier

Fichier : text_analyzer.py Projet : shuo956/Eduation_AI

def main():
    path='/home/shuo/Documents/AI_learning/LearningQ/data/teded/teded_crawled_data/'
    analysis=text_analysis()
    analysis.read_relation(path)
    #analysis.get_mintues()
    analysis.read_videoinfo(path)
    questions=analysis.gather_question()
    question=analysis.video_question
    #for item in question:
    #    print(question[item]['quizzes'][0].keys())
    """
    self.video_question[title]: video_link', 'video_title_length', 'video_description', 'quizzes', 'video_youtube_link
    quizzes: quiz_description', 'question_type', 'quiz_options', 'hint', 'answer'
    multiple-choices open-ended
    """
    scripts=analysis.gather_transcripts(path)
    stats_scripts(scripts)
    temp_dic=analysis.build_question_transcripts(path)

   # analysis.stats_scripts()
    temp=[]
    for item in temp_dic:
        for quiz in  temp_dic[item]['questions']:
            if quiz['question_type']=='multiple-choices':
                temp.append(temp_dic[item])
                break
    q=0
    for d in temp:
        for question in d['questions']:
            xxx=len(question['quiz_description'].split('.'))
            q+=xxx

    nlp = en_core_web_sm.load()
    #n_e=0
    total_r=0
    n=0
    for title in scripts:
        #sentences=scripts[title].split('\n')
        #e=NER(sentences,nlp)
        if len(scripts[title].split(' '))>=100:

            n+=1
            r = Readability(scripts[title])
            total_r+=r.flesch().score
        #n_e+=e
    #print(n_e)
    print(total_r)
    print(n)
    print(total_r/n)

Exemple #13

0

Afficher le fichier

def readability(id):
    r = {}
    text = getDocContent(id)
    #print text
    rd = Readability(text)

    r["ARI"] = rd.ARI()
    r["FleschReadingEase"] = rd.FleschReadingEase()
    r["FleschKincaidGradeLevel"] = rd.FleschKincaidGradeLevel()
    r["RIX"] = rd.RIX()
    r["GunningFogIndex"] = rd.GunningFogIndex()
    r["SMOGIndex"] = rd.SMOGIndex()
    r["ColemanLiauIndex"] = rd.ColemanLiauIndex()
    r["LIX"] = rd.LIX()

    return r

Exemple #14

0

Afficher le fichier

    def process(self, **kwargs):
        pageContent = kwargs['pageContent']
        url = kwargs['pageBaseUri']

        readability = Readability(pageContent, url)

        return readability.content

Exemple #15

0

Afficher le fichier

    def parse(self, response):
        news = Readability(str(response.body.decode('utf8'))).parse()
        if not news['title']:
            print("Could not find the title!", response.url)
        else:
            # get category that i given [politics, economy, society, health, world, technology]
            news_category = response.meta.get('category', 'default')

            output = {**news, "ikon_category": news_category}
            pjoin = os.path.join
            file_path = pjoin('./corpuses_ikon', news_category)
            os.makedirs(file_path, exist_ok=True)
            with open(
                    pjoin(
                        file_path,
                        md5(news['title'].encode('utf-8')).hexdigest() +
                        ".json"), 'w') as outfile:
                json.dump(output, outfile, ensure_ascii=False)

        for next_page in response.xpath("//*[contains(@class, 'nlitem')]//a"):
            yield response.follow(
                next_page,
                self.parse,
                meta={'category': response.meta.get('category', 'default')})

        for next_page in response.xpath(
                "//*[contains(@class, 'ikon-right-dir')]/parent::a"):
            yield response.follow(
                next_page,
                self.parse,
                meta={'category': response.meta.get('category', 'default')})

Exemple #16

0

Afficher le fichier

Fichier : text_statistics.py Projet : anonymoussubmissionuser2021/covision

    def analyze_text_by_single_method(self, text: str, method: str, attribute: str) -> Any:
        """
        Given a string representing the text of a document to be considered, the :func:`analyze_text_by_single_method`
        will process the text using one specific chosen method

        Parameters
        -----------
        text: `str`, required
            The input text to be processed

        method: `str`, required
            Method name

        attribute: `str`, required
            The attribute name (for example, score)

        Returns
        -----------
        The different grade_level and scores associated with each metric will be returned.
        """
        r = Readability(text)
        r_obj = getattr(r, method)()
        if r_obj is None:
            return "NA"
        else:
            return getattr(r_obj, attribute)

Exemple #17

0

Afficher le fichier

Fichier : text_statistics.py Projet : anonymoussubmissionuser2021/covision

    def analyze_text_complexity(self, text: str) -> Dict[str, List[Any]]:
        """
        Given a string representing the text of a document to be considered, the :func:`analyze_text_complexity`
        computes different readability-related evaulations.

        Parameters
        -----------
        text: `str`, required
            The input text to be processed

        Returns
        -----------
        The different grade_level and scores associated with each metric will be returned.
        """
        output = dict()
        r = Readability(text)
        for met in self.meta.keys():
            try:
                r_obj = getattr(r, met)()
            except:
                r_obj = None
            for attr in self.meta[met]:
                key = "_".join([met, attr])
                if r_obj:
                    output[key] = [getattr(r_obj, attr)]
                else:
                    output[key] = ['NA']

        return output

Exemple #18

0

Afficher le fichier

Fichier : base_analysis.py Projet : aflatoune/ecb-speechs-nlp

 def graph_readability(self):
     x = pd.to_datetime(self.dataframe.date)
     y = self.dataframe.speech.map(
         lambda u: Readability(u).ari().score)
     z = self.dataframe.speech.map(
         lambda u: Readability(u).flesch().score)
     t = self.dataframe.speech.map(
         lambda u: Readability(u).gunning_fog().score)
     plt.figure()
     plt.plot(x, y, label='Coleman-Biau index')
     plt.plot(x, z, label='Flesch-Kincaid index')
     plt.plot(x, t, label='Gunning- Fog index')
     plt.xlabel('Year')
     plt.xticks(rotation=90)
     plt.ylabel('Readability')
     plt.legend()
     plt.show()

Exemple #19

0

Afficher le fichier

def getReadableArticle(url):
    res = requests.get(url)
    if res.status_code != requests.codes.ok:
        return None
    rawHtml = res.text
    article = Readability(rawHtml,url)
    # if article is not None:
    #     with open(url.split('/')[-1].split('?')[0]+'.html', 'w+') as out:
    #         out.write(article.content)
    return article

Exemple #20

0

Afficher le fichier

Fichier : corpus-to-reading-level.py Projet : pjt82/building-blocks

def __dale_chall(r: Readability) -> float:
    try:
        lvls = r.dale_chall().grade_levels        
        if 'college_graduate' in lvls:
            return 17
        elif 'college' in lvls:
            return 13
        else:
            return stat.mean([float(lvl) for lvl in lvls])
    except ReadabilityException:
        return None

Exemple #21

0

Afficher le fichier

def check_readability(filename):
    with codecs.open(filename, 'r', 'utf8') as f:
        html = f.read()
    with codecs.open(filename.replace('.html', '.txt'), 'r', 'utf8') as f:
        text = f.read()

    parser = Readability(html)
    article_text = parser.article.get_text()
    rate = distance_rate(article_text, text)
    print article_text
    print 'rate', rate
    assert rate > 0.85

Exemple #22

0

Afficher le fichier

    def test_smog(self):
        text = """
        “On a June day sometime in the early 1990s, encouraged by his friend and fellow economist Jörgen Weibull, Abhijit went swimming in the Baltic. He leaped in and instantly jumped out—he claims that his teeth continued to chatter for the next three days. In 2018, also in June, we went to the Baltic in Stockholm, several hundred miles farther north than the previous encounter. This time it was literally child’s play; our children frolicked in the water.
        Wherever we went in Sweden, the unusually warm weather was a topic of conversation. It was probably a portent of something everyone felt, but for the moment it was hard not to be quite delighted with the new opportunities for outdoor life it offered.”. 
        """
        text = ' '.join(text for i in range(0, 5))

        readability = Readability(text)

        #Test SMOG with 30 sentences
        r1 = readability.smog()

        #Test SMOG with all sentences
        r2 = readability.smog(all_sentences=True)

        print("all_sentences=False: %s ; all_sentences=True: %s" % (r1, r2))
        self.assertEqual(12.516099999999998, r1.score)
        self.assertEqual('13', r1.grade_level)

        self.assertEqual(12.785403640627713, r2.score)
        self.assertEqual('13', r2.grade_level)

Exemple #23

0

Afficher le fichier

Fichier : corpus-to-reading-level.py Projet : pjt82/building-blocks

def __gunning_fog(r: Readability) -> float:
    try:
        lvl = r.gunning_fog().grade_level
        if lvl == 'college_graduate':
            return 17
        elif lvl == 'college':
            return 13
        elif lvl == 'na':
            return 0
        else:
            return float(lvl)
    except ReadabilityException:
        return None

Exemple #24

0

Afficher le fichier

Fichier : PANreadability.py Projet : aseempatni/author-profiling

def getReadability():
    authorFileNames = os.listdir(directory)
    texts = []
    authors = []
    truth = {}
    quote = []
    sents = []

    for file in authorFileNames:
        if file.endswith(".xml"):
            te = gettext(file)
            te.encode('ascii', 'ignore')
            texts.append(te)
            authors.append(file[:-4])
        else:
            fgh = open(directory + "/" + file, 'r')
            fg = fgh.read().split('\n')[:-1]
            for r in fg:
                df = r.split(':::')[1:]
                truth[r.split(':::')[0]] = df
            fgh.close()

    f = open('PANreadibility.csv', 'w')
    f.write(
        'ID,Gender,Age,ARI,FleschReadingEase,FleschKincaidGradeLevel,GunningFogIndex,SMOGIndex,ColemanLiauIndex,LIX,RIX\n'
    )
    for i in range(len(authors)):
        sf = texts[i]
        rd = Readability(sf.encode('ascii', 'ignore'))
        f.write(authors[i] + ',' + truth[authors[i]][0] + ',' +
                truth[authors[i]][1] + ',' + str(rd.ARI()) + ',' +
                str(rd.FleschReadingEase()) + ',' +
                str(rd.FleschKincaidGradeLevel()) + ',' +
                str(rd.GunningFogIndex()) + ',' + str(rd.SMOGIndex()) + ',' +
                str(rd.ColemanLiauIndex()) + ',' + str(rd.LIX()) + ',' +
                str(rd.RIX()) + '\n')

    f.close()

Exemple #25

0

Afficher le fichier

 def doc_to_readability(doc_str) -> ArrayField:
     if len(doc_str) < 10:
         return ArrayField(np.zeros(7))
     str_to_read = doc_str
     try:
         while len(str_to_read.split()) < 150:
             str_to_read += " " + doc_str
         r = Readability(str_to_read)
         r_scores = [
             r.flesch_kincaid().score,
             r.flesch().score,
             r.gunning_fog().score,
             r.coleman_liau().score,
             r.dale_chall().score,
             r.ari().score,
             r.linsear_write().score
         ]
         return ArrayField(np.array(r_scores))
     except ReadabilityException:
         return ArrayField(np.zeros(7))

Exemple #26

0

Afficher le fichier

Fichier : terms.py Projet : xmarvy/sequencer

    def run(self, book, **kwargs):
        doc = book.plaintext
        isbn = 'isbn' in book.metadata and book.metadata['isbn'][0]

        url = 'https://atlas-fab.lexile.com/free/books/' + str(isbn)

        headers = {'accept': 'application/json; version=1.0'}
        lexile = requests.get(url, headers=headers)
        # Checks if lexile exists for ISBN. If doesn't exist value remains 'None'.
        # If lexile does exist but no age range, value will be 'None'.
        # If no ISBN, value will be 'None'.
        if lexile.status_code == 200:
            lexile_work = lexile.json()['data']['work']
            self.lexile_min_age = str(lexile_work['min_age'])
            self.lexile_max_age = str(lexile_work['max_age'])
        try:
            r = Readability(doc)
            fk = r.flesch_kincaid()
            s = r.smog()
            self.readability_fk_score = fk.score
            self.readability_s_score = s.score
        # If less than 100 words
        except ReadabilityException:
            pass

Exemple #27

0

Afficher le fichier

Fichier : corpus-to-reading-level.py Projet : pjt82/building-blocks

def __calculate_sentences_median_grade_level(line: str) -> int:
    line = __fluff_line(line)
    r = Readability(line)
    grade_levels = [
        __ari(r),
        __coleman_liau(r),
        __dale_chall(r),
        __flesch_kincaid(r),
        __gunning_fog(r),    
        __linsear_write(r),        
        __smog(r),
        __spache(r)]
    grade_levels = [min(17, max(0, x)) for x in grade_levels if x != None]
    if len(grade_levels) == 0:
        return None
    grade_level = stat.median(grade_levels)
    return round(grade_level)

Exemple #28

0

Afficher le fichier

Fichier : data_crawl_gogo.py Projet : undarmaa/text_classification-1

    def parse_article(self, response):
        news =  Readability(str(response.body.decode('utf8'))).parse()
        if not news['title']:
            print("Could not find the title!", response.url)
        else:
            # get category that i given [politics, economy, society, health, world, technology]
            news_category = response.meta.get('category', 'default')

            output = {
                **news,
                "ikon_category": news_category
            }
            pjoin = os.path.join
            file_path = pjoin('./corpuses_gogo', news_category)
            os.makedirs(file_path, exist_ok=True)
            with open(pjoin(file_path, md5(news['title'].encode('utf-8')).hexdigest()+".json"), 'w') as outfile:
                json.dump(output, outfile, ensure_ascii=False)

Exemple #29

0

Afficher le fichier

Fichier : pocket_search.py Projet : th3m1s/pocket_search

def mapfunc(line, path_to_output):
    given_url = line.split(",")[2]

    if given_url.rsplit(".")[-1] == 'pdf|jpg|jpeg|png':
        print("FileFormatError,", given_url, file=sys.stderr)
        return

    try:
        htmlcode = urllib.request.urlopen(given_url).read().decode()
    except UnicodeDecodeError:
        print("UnicodeDecodeError,", given_url, file=sys.stderr)
        return
    except urllib.error.HTTPError:
        print("urllib.error.HTTPError,", given_url, file=sys.stderr)
        return
    except urllib.error.URLError:
        print("urllib.error.URLError,", given_url, file=sys.stderr)
        return
    except ConnectionResetError:
        print("ConnectionReseterror,", given_url, file=sys.stderr)
        return
    except ssl.CertificateError:
        print("ssl.CertificateError,", given_url, file=sys.stderr)
        return

    try:
        body_html = Readability(htmlcode, given_url).content
    except KeyError:
        print("KeyError,", given_url, file=sys.stderr)
        return

    body_removetag = bs4.BeautifulSoup(body_html, "lxml").text.replace(
        '\n', '').replace(',', '')

    row = [given_url, body_removetag]
    # row = [given_url, body_html, body_removetag]
    # print(",".join(row))

    with open(path_to_output, "a") as output:
        output.write(",".join(row) + "\n")

Exemple #30

0

Afficher le fichier

Fichier : demo.py Projet : sil-ai/aqua-demo

        ))
    st.plotly_chart(fig)


#------------------
# Readability
#------------------

st.header('Readability')

# Context 
passage = st.text_area("Candidate Bible Passage (English)", value='', 
        max_chars=None, key='readability_passage')

# Calculate readability
r = Readability(passage)

# Display readability
data = [
        ['Flesch-Kincaid Score', r.flesch_kincaid().score],
        ['Flesch Reading Ease', r.flesch().ease],
        ['Dale Chall Readability Score', r.dale_chall().score],
        ['Automated Readability Index Score', r.ari().score],
        ['Coleman Liau Index', r.coleman_liau().score],
        ['Gunning Fog', r.gunning_fog().score],
        ['Linsear Write', r.linsear_write().score],
        ['Spache Readability Formula', r.spache().score]
        ]
df = pd.DataFrame(data, columns=['Readability Metric', 'Value'])
if st.button('Assess Readability', key=None):
    st.write(df)

Exemple #31

0

Afficher le fichier

 def readability(self, text):
     rd = Readability(text)
     fkg_score = rd.FleschKincaidGradeLevel()
     SMOG = rd.SMOGIndex()
     return fkg_score, SMOG

Exemple #32

0

Afficher le fichier

print((cutup_pieces_list))

#to see number of possible Permutations(different combinations of list enteries without any repetitions)
#e.g in case of A,B,C . Permutations ABC, ACB, BAC,BCA, CAB, CBA  i.e. 6 combinations for 3 columns/list enteries
num_combinations=math.factorial(num_columns)

"""Finding All Cut Up Permutations and Storage in a list """
permutations_object = itertools.permutations(cutup_pieces_list) #Find permutations of a_list.
permutations_list = list(permutations_object)        #Create list from permutations.
permuted_strings_list=["".join(tup) for tup in permutations_list]
#print(permuted_strings_list)

readability_index_list=[]

for i in range(num_combinations):
    r = Readability(permuted_strings_list[i])
    dc = r.dale_chall()
    readability_index_list.append(dc.score)
    #print(dc.grade_levels)

#Finding index of list with highest readability score to extract text which is most readable
index=readability_index_list.index(max(readability_index_list))
print(permuted_strings_list[index])
#    print(permuted_strings_list[i])
"""Dale Chall Readability
The Dale-Chall Formula is an accurate readability formula for the simple reason that it is based on the use of
familiar words, rather than syllable or letter counts. Reading tests show that readers usually find it easier 
to read, process and recall a passage if they find the words familiar."""

Exemple #33

0

Afficher le fichier

Fichier : features_jeff.py Projet : jbarrow/cmsc_773

def get_smog(text):
	txt = Readability(text)
	try:
		return txt.SMOGIndex()
	except ZeroDivisionError:
		return 0

Exemple #34

0

Afficher le fichier

Fichier : document.py Projet : leonardleonard/spyder

    def parseDocument(self, doc):
        doc = pq(doc);

	wrapparent = self.articleRule.wrapparent
	pageparent = self.articleRule.pageparent
	content_re = "";
	#子页面url
	urls = []

	#文本数据内容
	content = ""

	article = doc.find(wrapparent);
	#pages
	if pageparent:
	    urls = self.parsePage(article, pageparent)
	#need title, tags
	extrarules = self.articleRule.extrarules

	#只有文章是有content
        #TODO: 这里目前缺失一些特性
	if len(extrarules):
	    for key, rule, fetch_all, page_type in extrarules:
		field = Field(name = key, rule=rule);
		value = getElementData(doc, rule, self.data["images"], fetch_all)

		self.data[field.get('name')] = field

		if self.is_article_content(field):
		    content_re = field.get("rule")
		    content = value
		elif self.is_gallery_content(field):
		    content_re = field.get("rule")
		    content = []
		    if (isinstance(value, list)):
			content += value
		else:
		    field.value = value

	#采集分页内容
	if len(urls) > 0 and content_re:
	    for next_url in urls:
		next_page = Fetch(next_url, charset = self.seed["charset"], timeout = self.seed["timeout"]).read()
		if next_page is not None:
		    next_page = self._getContent(next_page, wrapparent, content_re);
		    if next_page:
			if isinstance(content, list):
			    content.append(next_page)
			else:
			    content += next_page

	if content and content_re:
	    if isinstance(content, list):
		self.data['content'].value = content
		self.data['images'] += content
	    else:
		content = Readability(content, self.url, self.articleRule.filters)
		images = content.getImages();

		self.data['content'].value = content.getContent();
		self.data['images'] += images