Python StreamItem.body Exemples

Langage de programmation: Python

Espace de nommage/Pack: streamcorpus

Class/Type: StreamItem

Méthode/Fonction: body

Exemples au hotexamples.com: 6

Python StreamItem.body - 6 exemples trouvés. Ce sont les exemples réels les mieux notés de streamcorpus.StreamItem.body extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

StreamItem(4)

body(2)

Méthodes fréquemment utilisées

StreamItem (4)

body (2)

Associées

katz_centrality_numpy

log_request_error

get_bounding_box

rho_D_inv_A

shorten

id

is_equivalent

get_temp_dir

Config

load_predictor

Related in langs

formulateData (PHP)

PaymentsReceived (PHP)

ContributorCalled (C#)

DropDownList_qy (C#)

interpProcessorActive (C++)

s3cfb_earlysuspend_cfg_gpio (C++)

Client (Go)

Min (Go)

SmsBroadcastConfigInfo (Java)

PathAnalysis (Java)

Exemple #1

0

Afficher le fichier

Fichier : test_hyperlink_labels.py Projet : naimdjon/streamcorpus-pipeline

def test_speed(parser_type, test_data_dir): stream_items = [] for i in xrange(10): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.join(test_data_dir, "test") stream_item.body.clean_html = open(os.path.join(path, "nytimes-index-clean.html")).read() stream_items.append(stream_item) context = {} start = time.time() hl = hyperlink_labels( config={ "require_abs_url": True, "all_domains": False, "domain_substrings": ["nytimes.com"], "offset_types": [parser_type], } ) for si in stream_items: si = hl(si, context) elapsed = time.time() - start rate = len(stream_items) / elapsed logger.debug("OffsetType: {}".format(OffsetType)) logger.info("{:.1f} per second for {}".format(rate, parser_type))

Exemple #2

0

Afficher le fichier

Fichier : test_hyperlink_labels.py Projet : nithintumma/streamcorpus-pipeline

def test_speed(parser_type): stream_items = [] for i in xrange(10): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.dirname(__file__) path = os.path.join( path, _TEST_DATA_ROOT, 'test' ) stream_item.body.clean_html = open( os.path.join(path, 'nytimes-index-clean.html')).read() stream_items.append( stream_item ) context = {} start = time.time() ## run it with a byte state machine for si in stream_items: si = hyperlink_labels( {'require_abs_url': True, 'domain_substrings': ['nytimes.com'], 'all_domains': False, 'offset_types': [parser_type]} )(si, context) elapsed = time.time() - start rate = len(stream_items) / elapsed print OffsetType print '\n\n%.1f per second for %s' % (rate, parser_type)

Exemple #3

0

Afficher le fichier

def test_speed(parser_type, test_data_dir): stream_items = [] for i in xrange(10): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.join(test_data_dir, 'test' ) stream_item.body.clean_html = open( os.path.join(path, 'nytimes-index-clean.html')).read() stream_items.append( stream_item ) context = {} start = time.time() hl = hyperlink_labels(config={ 'require_abs_url': True, 'all_domains': False, 'domain_substrings': ['nytimes.com'], 'offset_types': [parser_type], }) for si in stream_items: si = hl(si, context) elapsed = time.time() - start rate = len(stream_items) / elapsed logger.debug('OffsetType: {}'.format(OffsetType)) logger.info('{:.1f} per second for {}'.format(rate, parser_type))

Exemple #4

0

Afficher le fichier

Fichier : test_hyperlink_labels.py Projet : naimdjon/streamcorpus-pipeline

def test_long_doc(parser_type, test_data_dir): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.join(test_data_dir, "test") stream_item.body.clean_html = open(os.path.join(path, "company-test.html")).read() context = {} hl = hyperlink_labels(config={"require_abs_url": True, "all_domains": True, "offset_types": [parser_type]}) hl(stream_item, context)

Exemple #5

0

Afficher le fichier

def test_long_doc(parser_type, test_data_dir): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.join(test_data_dir, 'test' ) stream_item.body.clean_html = open( os.path.join(path, 'company-test.html')).read() context = {} hl = hyperlink_labels(config={ 'require_abs_url': True, 'all_domains': True, 'offset_types': [parser_type], }) hl(stream_item, context)

Exemple #6

0

Afficher le fichier

Fichier : test_hyperlink_labels.py Projet : nithintumma/streamcorpus-pipeline

def test_long_doc(parser_type): stream_item = StreamItem() stream_item.body = ContentItem() path = os.path.dirname(__file__) path = os.path.join( path, _TEST_DATA_ROOT, 'test' ) stream_item.body.clean_html = open( os.path.join(path, 'company-test.html')).read() context = {} ## run it with a byte state machine hyperlink_labels( {'require_abs_url': True, 'all_domains': True, ## will fail if set to bytes 'offset_types': [parser_type]} )(stream_item, context)