Esempi in Python per DmozParser

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: parser

Classe/tipologia: DmozParser

Esempi su hotexamples.com: 6

{shortObject} in {lang}: {examplesCount,plural,one{1 esempio trovato. Questo è il miglior esempio reale in {lang} per {object}, estratto da progetti open source. Lo} other{{examplesCount} esempi trovati. Questi sono i migliori esempi reali in {lang} per {object}, estratti da progetti open source. Li}} puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

add_handler(3)

DmozParser(1)

input_path(1)

run(1)

Esempio n. 1

Mostra file

File: sample.py Progetto: munichong/dmoz-parser

#!/usr/bin/env python

from parser import DmozParser
from handlers import JSONWriter

class LawrenceFilter:
  def __init__(self):
    self._file = open("seeds.txt", 'w')

  def page(self, page, content):
      if page != None and page != "":
          topic = content['topic']
          if topic.find('United_States/Kansas/Localities/L/Lawrence') >  0 :
              self._file.write(page + "\n")
              print "found page %s in topic %s" % (page , topic)

  def finish(self):
    self._file.close()


parser = DmozParser()
parser.add_handler(
    LawrenceFilter()
    #JSONWriter('output.json')
)
parser.run()

Esempio n. 2

Mostra file

File: sample.py Progetto: uofthuangsong/CSC411_project

#!/usr/bin/env python

from parser import DmozParser
from handlers import JSONWriter

parser = DmozParser()
parser.add_handler(JSONWriter('output.json'))
parser.run()

Esempio n. 3

Mostra file

File: sample.py Progetto: not-today/PopStar-and-PopRank

#   <d:Title>Animation World Network</d:Title>
#   <d:Description>Provides information resources to the international animation community. Features include searchable database archives, monthly magazine, web animation guide, the Animation Village, discussion forums and other useful resources.</d:Description>
#   <priority>1</priority>
#   <topic>Top/Arts/Animation</topic>
# </ExternalPage>
# This assumption is strictly checked, and processing will abort if it is violated.
# To use this parser, one should unpack the content.rdf.u8.gz first

class Filter:
  def __init__(self):
    self._file = open("seeds.txt", 'w')

  def page(self, page, content):
      if page != None and page != "":
          topic = content['topic']
          with open("category.txt") as f:
                ctg = f.readline().strip()
          if topic.find(ctg) > 0 :
              self._file.write(page + "\n")
              print "found page %s in topic %s" % (page , topic)

  def finish(self):
    self._file.close()


parser = DmozParser()
parser.add_handler(
    Filter()
)
parser.run()

Esempio n. 4

Mostra file

# <ExternalPage about="http://www.awn.com/">
#   <d:Title>Animation World Network</d:Title>
#   <d:Description>Provides information resources to the international animation community. Features include searchable database archives, monthly magazine, web animation guide, the Animation Village, discussion forums and other useful resources.</d:Description>
#   <priority>1</priority>
#   <topic>Top/Arts/Animation</topic>
# </ExternalPage>
# This assumption is strictly checked, and processing will abort if it is violated.
# To use this parser, one should unpack the content.rdf.u8.gz first


class Filter:
    def __init__(self):
        self._file = open("seeds.txt", 'w')

    def page(self, page, content):
        if page != None and page != "":
            topic = content['topic']
            with open("category.txt") as f:
                ctg = f.readline().strip()
            if topic.find(ctg) > 0:
                self._file.write(page + "\n")
                print "found page %s in topic %s" % (page, topic)

    def finish(self):
        self._file.close()


parser = DmozParser()
parser.add_handler(Filter())
parser.run()

Esempio n. 5

Mostra file

File: sample.py Progetto: tsa87/newscrawl

#!/usr/bin/env python

from parser import DmozParser
from handlers import JSONWriter


class LawrenceFilter:
    def __init__(self):
        self._file = open("seeds.txt", 'w')

    def page(self, page, content):
        if page != None and page != "":
            topic = content['topic']
            if topic.find('Venture') > 0 or topic.find(
                    'Financial_Services') > 0:
                self._file.write(page + " " + topic + "\n")
                print("found page %s in topic %s" % (page, topic))

    def finish(self):
        self._file.close()


parser = DmozParser()
parser.add_handler(LawrenceFilter()
                   #JSONWriter('output.json')
                   )
parser.run()

Esempio n. 6

Mostra file

File: writeTaxomie.py Progetto: chris3456/dmoz-parser

#!/usr/bin/env python

import logging

from parser import DmozParser
from handlers import JSONWriter
from handlers import TaxonomieWriter

logger = logging.getLogger(__name__)

logging.basicConfig(
	format='%(asctime)s : %(levelname)s : %(module)s:%(funcName)s:%(lineno)d : %(message)s',
      	level=logging.INFO)

parser = DmozParser()
parser.input_path = '../content.rdf.u8.gz'
parser.add_handler(TaxonomieWriter('output.json'))
parser.run()