Python Extraction.Extraction Exemples, extraction.Extraction.Extraction Python Exemples

Exemple #1

0

Afficher le fichier

    def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            data = json.load(fin)
        for sentence in data:
            tuples = data[sentence]
            for t in tuples:
                if t["pred"].strip() == "<be>":
                    rel = "[is]"
                else:
                    rel = t["pred"].replace("<be> ", "")
                confidence = 1

                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=sentence,
                                           confidence=float(confidence),
                                           index=None)
                if t["arg0"] != "":
                    curExtraction.addArg(t["arg0"])
                if t["arg1"] != "":
                    curExtraction.addArg(t["arg1"])
                if t["arg2"] != "":
                    curExtraction.addArg(t["arg2"])
                if t["arg3"] != "":
                    curExtraction.addArg(t["arg3"])
                if t["temp"] != "":
                    curExtraction.addArg(t["temp"])
                if t["loc"] != "":
                    curExtraction.addArg(t["loc"])

                d[sentence].append(curExtraction)
        self.oie = d

Exemple #2

0

Afficher le fichier

Fichier : seg_pipeline.py Projet : jaypatravali/particle_filter

    def __init__(self, cam_type='zed', realtime=False):

        if cam_type is 'zed':
            odom_file = '/export/patraval/robo_car_new_loop_all/zed_front/gps/fix.txt'
            self.initial_offset = 6070
            self.offset_length = 6634

        elif cam_type is 'pg':
            # odom_file = '/export/patraval/robo_car_loop2/pg_cam/gps/fix.txt'
            odom_file = '/export/patraval/robo_loop_pg_only/pg_cam/gps/fix.txt'

        self.initial_offset = 5953
        self.offset_length = 6522

        self.initial_offset = 1600  # pg  #1417 #-- zed
        # self.initial_offset = 1948

        self.offset_length = 6000

        # self.initial_offset = 3510
        # self.offset_length = 6634
        print(self.initial_offset, self.offset_length)
        self.transformer = State_Transition(odom_file, cam_type,
                                            self.initial_offset, realtime)
        self.extractor = Extraction(cam_type)

Exemple #3

0

Afficher le fichier

Fichier : tabReader.py Projet : juanma1982/Multi2OIE

    def read(self, fn):
        """
        Read a tabbed format line
        Each line consists of:
        sent, prob, pred, arg1, arg2, ...
        """
        d = {}
        ex_index = 0
        with open(fn) as fin:
            for line in fin:
                if not line.strip():
                    continue
                data = line.strip().split('\t')
                try:
                    text, confidence, rel = data[:3]
                except:
                    continue
                curExtraction = Extraction(
                    pred=rel,
                    head_pred_index=None,
                    sent=text,
                    confidence=float(confidence),
                    question_dist=
                    "./question_distributions/dist_wh_sbj_obj1.json",
                    index=ex_index)
                ex_index += 1

                for arg in data[3:]:
                    curExtraction.addArg(arg)

                d[text] = d.get(text, []) + [curExtraction]
        self.oie = d

Exemple #4

0

Afficher le fichier

    def __init__(self):
        self.icp = ICP()
        self.ekf = EKF()
        self.extraction = Extraction()

        # odom robot init states
        self.robot_x = rospy.get_param('/icp/robot_x',0)
        self.robot_y = rospy.get_param('/icp/robot_y',0)
        self.robot_theta = rospy.get_param('/icp/robot_theta',0)
        self.sensor_sta = [self.robot_x,self.robot_y,self.robot_theta]
        self.isFirstScan = True
        self.src_pc = []
        self.tar_pc = []

        # State Vector [x y yaw]
        self.xOdom = np.zeros((3,1))
        self.xEst = np.zeros((3,1))
        self.PEst = np.eye(3)
        
        # map observation
        self.obstacle = []
        # radius
        self.obstacle_r = 10

        # init map
        self.updateMap()
        # ros topic
        self.laser_sub = rospy.Subscriber('/course_agv/laser/scan',LaserScan,self.laserCallback)
        self.location_pub = rospy.Publisher('ekf_location',Odometry,queue_size=3)
        self.odom_pub = rospy.Publisher('icp_odom',Odometry,queue_size=3)
        self.odom_broadcaster = tf.TransformBroadcaster()
        self.landMark_pub = rospy.Publisher('/landMarks',MarkerArray,queue_size=1)

Exemple #5

0

Afficher le fichier

Fichier : saving.py Projet : Mikita-Kharitonau/ETLink

    def requires(self):
        """
        Depends on list of Extraction tasks.
        """

        for url in self.urls:
            yield Extraction(url)

Exemple #6

0

Afficher le fichier

Fichier : slam_ekf.py Projet : ShawHaines/MyAGV

    def __init__(self, nodeName="slam_ekf"):
        super(SLAM_Localization, self).__init__(nodeName)

        self.icp = SubICP()
        self.extraction = Extraction()

        self.isFirstScan = True
        self.laser_count = 0
        # interval
        self.laser_interval = 5
        # State Vector [x y yaw].T, column vector.
        # self.xOdom = np.zeros((STATE_SIZE,1))
        self.xEst = np.zeros((STATE_SIZE, 1))
        # Covariance. Initial is very certain.
        self.PEst = np.zeros((STATE_SIZE, STATE_SIZE))
        # landMark Estimation. Like former self.tar_pc
        self.lEst = np.zeros((LM_SIZE, 0))  # lEst should be of 2*N size

        # ros topic
        self.laser_sub = rospy.Subscriber('/course_agv/laser/scan', LaserScan,
                                          self.laserCallback)
        # self.location_pub = rospy.Publisher('ekf_location',Odometry,queue_size=3)

        ## localization parameters
        # minimum landmark matches to update.
        self.min_match = int(rospy.get_param('/slam/min_match', 2))
        # minimum number of points for a landmark cluster
        self.extraction.landMark_min_pt = int(
            rospy.get_param('/slam/landMark_min_pt', 1))
        # maximum radius to be identified as landmark
        self.extraction.radius_max_th = float(
            rospy.get_param('/slam/radius_max_th', 0.4))

        OBSTACLE_RADIUS = 0.35

Exemple #7

0

Afficher le fichier

    def setUpClass(cls):
        cls.extraction_task_1 = Extraction(url=TEST_URL_1)
        cls.extraction_task_2 = Extraction(url=TEST_URL_2)

        cls.saving_task = Saving([TEST_URL_1, TEST_URL_2])
        cls.saving_task.input = lambda: [
            cls.extraction_task_1.output(),
            cls.extraction_task_2.output()
        ]
        cls.cmd("hadoop fs -rm -r /{app_name}".format(
            app_name=APPLICATION_NAME).split())
        cls.cmd("hadoop fs -mkdir -p /{app_name}/{ex_out}".format(
            app_name=APPLICATION_NAME, ex_out=EXTRACTION_OUTPUT).split())
        cls.cmd(
            "hadoop fs -put test-resources/https--en.wikipedia.org-wiki-Battle_of_Austerlitz.html /{app_name}/{ex_out}"
            .format(app_name=APPLICATION_NAME,
                    ex_out=EXTRACTION_OUTPUT).split())
        cls.cmd(
            "hadoop fs -put test-resources/https--en.wikipedia.org-wiki-Napoleon.html /{app_name}/{ex_out}"
            .format(app_name=APPLICATION_NAME,
                    ex_out=EXTRACTION_OUTPUT).split())

Exemple #8

0

Afficher le fichier

	def GET(self, datafile, method):
		
		params = web.input(output="xml")
		"""Returns some extracted information from a file"""
		extractor = Extraction()
		data = ''
		txtfile = TMP_FOLDER + datafile + '.txt'
		
		"""Check if the file exists, if not return a 404"""
		if not os.path.exists(txtfile):
			return web.notfound()
		
		try:
			if method == 'text':
				txtfile = TMP_FOLDER + datafile + '.txt'
				web.header('Content-Type', 'text/text') # Set the Header
				return open(txtfile,"rb").read()
			elif method == 'file':
				pdffile = TMP_FOLDER + datafile
				typeFilterStatus = utilities.typeFilter(pdffile)
				web.header('Content-Type', typeFilterStatus) # Set the Header
				return open(pdffile,"rb").read()
			else:
				if method == 'header':
					data = data + extractor.extractHeaders(txtfile)
				elif method == 'citations':
					data = data + extractor.extractCitations(txtfile)
				elif method == 'body':
					data = data + extractor.extractBody(txtfile)
				elif method == 'keyphrases':
					data = data + extractor.extractKeyphrases(txtfile)
				#Print XML or JSON
				if params.output == 'xml' or params.output == '':
					web.header('Content-Type','text/xml; charset=utf-8')
					return utilities.printXML(data)
				elif params.output == 'json':
					jsondata = xmltodict.parse(data)
					web.header('Content-Type','text/json; charset=utf-8') 	
					return json.dumps(jsondata)
				else:
					web.ctx.status = '400'
					return 'Unsupported output format. Options are: "xml" (default) and "json"'
		
		except (IOError, OSError) as er: #Internal error, i.e. during extraction
			web.debug(er)
			return web.internalerror()

Exemple #9

0

Afficher le fichier

Fichier : goldReader.py Projet : zhanjunlang/Span_OIE

    def read(self, fn):
        d = defaultdict(lambda: [])
        with open(fn) as fin:
            for line_ind, line in enumerate(fin):
                data = line.strip().split('\t')
                text, rel = data[:2]
                args = data[2:]
                confidence = 1

                curExtraction = Extraction(pred=rel,
                                           head_pred_index=None,
                                           sent=text,
                                           confidence=float(confidence),
                                           index=line_ind)
                for arg in args:
                    curExtraction.addArg(arg)

                d[text].append(curExtraction)
        self.oie = d

Exemple #10

0

Afficher le fichier

Fichier : generalReader.py Projet : zhanjunlang/Span_OIE

    def read(self, fn):
        d = {}
        with open(fn) as fin:
            for line in fin:
                data = line.strip().split('\t')
                if len(data) >= 4:
                    arg1 = data[3]
                    rel = data[2]
                    arg_else = data[4:]
                    confidence = data[1]
                    text = data[0]

                    curExtraction = Extraction(pred=rel,
                                               head_pred_index=-1,
                                               sent=text,
                                               confidence=float(confidence))
                    curExtraction.addArg(arg1)
                    for arg in arg_else:
                        curExtraction.addArg(arg)
                    d[text] = d.get(text, []) + [curExtraction]
        self.oie = d

Exemple #11

0

Afficher le fichier

Fichier : slam_ekf.py Projet : Forrest-Z/course2020-wheeled_robot

    def __init__(self):
        # ros param
        self.robot_x = rospy.get_param('/slam/robot_x',0)
        self.robot_y = rospy.get_param('/slam/robot_y',0)
        self.robot_theta = rospy.get_param('/slam/robot_theta',0)
        ## ros param of mapping
        self.map_x_width = rospy.get_param('/slam/map_width')
        self.map_y_width = rospy.get_param('/slam/map_height')
        self.map_reso = rospy.get_param('/slam/map_resolution')
        self.map_cellx_width = int(round(self.map_x_width/self.map_reso))
        self.map_celly_width = int(round(self.map_y_width/self.map_reso))

        self.icp = ICP()
        self.ekf = EKF()
        self.extraction = Extraction()
        self.mapping = Mapping(self.map_cellx_width,self.map_celly_width,self.map_reso)

        # odom robot init states
        self.sensor_sta = [self.robot_x,self.robot_y,self.robot_theta]
        self.isFirstScan = True
        self.src_pc = []
        self.tar_pc = []

        # State Vector [x y yaw]
        self.xOdom = np.zeros((STATE_SIZE, 1))
        self.xEst = np.zeros((STATE_SIZE, 1))
        self.PEst = np.eye(STATE_SIZE)
        
        # map observation
        self.obstacle = []
        # radius
        self.obstacle_r = 10

        # ros topic
        self.laser_sub = rospy.Subscriber('/course_agv/laser/scan',LaserScan,self.laserCallback)
        self.location_pub = rospy.Publisher('ekf_location',Odometry,queue_size=3)
        self.odom_pub = rospy.Publisher('icp_odom',Odometry,queue_size=3)
        self.odom_broadcaster = tf.TransformBroadcaster()
        self.landMark_pub = rospy.Publisher('/landMarks',MarkerArray,queue_size=1)
        self.map_pub = rospy.Publisher('/slam_map',OccupancyGrid,queue_size=1)

Exemple #12

0

Afficher le fichier

Fichier : localization_lm.py Projet : ShawHaines/MyAGV

    def __init__(self, nodeName="ekf_icp"):
        super(EKF_Landmark_Localization, self).__init__(nodeName)

        self.icp = SubICP()
        self.extraction = Extraction()

        self.src_pc = None
        self.isFirstScan = True
        self.laser_count = 0
        # interval
        self.laser_interval = 5

        # State Vector [x y yaw].T, column vector.
        self.xEst = np.zeros((STATE_SIZE, 1))
        # Covariance. Initial state is certain.
        # self.PEst=np.eye(STATE_SIZE)
        self.PEst = np.zeros((STATE_SIZE, STATE_SIZE))

        # init map
        # map observation
        self.tar_pc = None
        self.updateMap()

        # ros topic
        self.laser_sub = rospy.Subscriber('/course_agv/laser/scan', LaserScan,
                                          self.laserCallback)
        # self.location_pub = rospy.Publisher('ekf_location',Odometry,queue_size=3)

        # parameters from launch file.
        # minimum landmark matches to update. Actually even 1 point is acceptable.
        self.min_match = int(rospy.get_param('/localization/min_match', 1))
        # minimum number of points for a landmark cluster
        self.extraction.landMark_min_pt = int(
            rospy.get_param('/localization/landMark_min_pt', 2))
        # maximum radius to be identified as landmark
        self.extraction.radius_max_th = float(
            rospy.get_param('/localization/radius_max_th', 0.4))

Exemple #13

0

Afficher le fichier

from extraction import Extraction
import time
startTime = time.time()
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(startTime))
extract1 = Extraction()
extract1.loadData()
extract1.run()
endTime = time.time()
print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(endTime))
seconds = endTime - startTime
print seconds
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
print "%d:%02d:%02d" % (h, m, s)

Exemple #14

0

Afficher le fichier

from flask import Flask, render_template, request, jsonify

import sentences_list
import random
from extraction import Extraction
from parssing import Parssing
import wikipedia
app = Flask(__name__)

app.config['SECRET_KEY'] = '7TKUKe09wW1PlrtSL066lsN18uWA7iuO'

# Instances creation
datas_extraction = Extraction()
datas_management = Parssing()


@app.route('/')
def home():
    return render_template('index.html', title="Bienvenue chez GrandPy Bot")


@app.route('/_answer')
def answer():
    question = request.args.get('question', 0, type=str)
    return jsonify(result=question)


@app.route('/_address')
def address():
    question = request.args.get('question', 0, type=str)
    filtered_sentence = Parssing.get_main_words(datas_management, question)

Exemple #15

0

Afficher le fichier

set_logger()

if EXTRACTION:
"""
The extraction phase involves the process of obtaining a set of documents from a repository, such as Scopus or the Web of Science, or it can involve the steps of scraping a
publisher’s website to retrieve full-text articles (typically in PDF format). Scopus generally provides publication abstracts, including all the meta-data (journal, authors,
affiliations, publication date) through various APIs. The upside of using an API is that publication content is easily obtained for a large number of documents simultaneously,
however, these APIs often do not provide full-text for all the publications or journals of interest. In these cases, scraping a publisher’s websites can be an alternative solution.
This process involves building many handcrafted crawlers, as each publisher lists their publications in a different manner on their website. Download limits should always be
respected when building such scripts. Another option would be to manually download articles, although such approaches might not be feasible if the document collection of interest
contains thousands or tens of thousands of articles. To enable a comparison of topics by time, or a comparison of topics by journals, it is important to store this information
alongside the content of the document.
"""

# instantiate Extraction class
extraction = Extraction()

# extract publications from NIPS website
extraction.extract_publications()

if PREPROCESSING:
"""
The pre-processing phase can be seen as the process of going from a document source to an interpretable representation for the topic model algorithm. This phase is typically different
for full-text and abstract data. One of the main differences is that abstract data is often provided in a clean format, whereas full-text is commonly obtained by converting a PDF document
into its plain text representation.

Within this phase, an important part is to filter out the content that is not important from a topic model's point-of-view, rather than from a human’s point-of-view. Abstract data
usually comes in a clean format of around 300--400 words, and little additional text is added to it; typically the copyright statement is the only text that should be removed.
In contrast, full-text articles can contain a lot of additional text that has been added by the publisher. This is article meta-data and boilerplate. It is important that such
additional text is removed, and various methods to do so exist. Examples include: deleting the first cover page; deleting the first n-bits of the content; using regular expressions
or other pattern matching techniques to find and remove additional text, or more advanced methods. For full-text articles, a choice can be made to also exclude the reference

Exemple #16

0

Afficher le fichier

if __name__ == "__main__":

    print "This is a script to help extract audio features and to calculate distances between songs with choice parameters."
    print "All data is stored in a database of MongoDB."
    print "The script only works with wave files, being named in a way described later. You can give mp3 or m4a files (ID3 taged!) "
    print "to the script and it will convert them towave files with ffmpeg for you. Make sure ffmpeg and mutagen are installed."
    print "For audio feature extraction, this script is using the bregman audio toolbox and aubio toolbox (for the rhythm) "
    print "which you have to download and install (make sure both work)."
    print "There is also a set of prerequisits for python: you should have installed numpy, etc." #update all python packages that have to be installed
    print "__________________________________________"
    print "At any time, press q if you want to quit."
    print "__________________________________________"

    y = None

    while (y != "q"):
        message = "What do you want to do? File conversion (c), feature extraction (e) or distance calculation(d)? : "
        y = raw_input(message)

        if y == "c":
            c = Conversion()
        elif y == "e":
            e = Extraction()
        elif y == "d":
            d = Distance()
        elif y == "q":
            print "Goodbye!"
        else:
            print "There is no %r option, please try again." %(y)

Exemple #17

0

Afficher le fichier

Fichier : main.py Projet : jw1123/Toolbox

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from conversion import Conversion
from extraction import Extraction
from distance import Distance
from sys import argv

if __name__ == "__main__":

    y = argv[1]

    if "-c" in y:
        c = Conversion(y)
    elif "-e" in y:
        e = Extraction(y)
    elif y == "-d":
        d = Distance()
    else:
        print "There is no %r option. Try -cmp3 -cm4a -e or -d." % (y)

Exemple #18

0

Afficher le fichier

Fichier : main.py Projet : jw1123/Toolbox

    song_features = db.song_features_collection  # #####REPLACE#####
    distance_features = db.distance_features_collection  # #####REPLACE#####
    #__________________________________________________________________________

    options, rem = getopt(argv[1:], 'c:e:d:n:g:h', [
        'conversion=', 'extraction=', 'distance', 'neighbour', 'graphdist',
        'help'
    ])

    for opt, arg in options:
        if opt in ('-c', '--conversion'):
            from conversion import Conversion
            c = Conversion(arg, rem[0])
        elif opt in ('-e', '--extraction'):
            from extraction import Extraction
            e = Extraction(arg, song_features)
        elif opt in ('-d', '--distance'):
            from distance import Distance
            d = Distance(song_features, distance_features)
        elif opt in ('-n', '--neighbour'):
            from neighbour import Neighbour
            n = Neighbour(song_features, distance_features)
        elif opt in ('-g', '--graphdist'):
            from graphdist import Graphdist
            g = Graphdist(song_features, distance_features)
        elif opt in ('-h', '--help'):
            print """The following options are available:
            -c, --conversion mp3/m4a
            =>  conversion of mp4 and mp3 files to wave files

            -e, --extraction