Beispiel #1
0
 def __init__(self, df_data):
     self.df_data = df_data
     self.network = None
     self.network_adjusted = None
     self.df_types = None
     self.data_df = None
     self.d_id_title = None
     self.d_title_id = None
     self.views_df = None
     self.extract_data = ExtractData()
     self.logger = logging.getLogger(__name__)
    def execute(self):
        all_files = [
            splitext(f)[0] for f in os.listdir(self.INPUT_FOLDER_PATH)
        ]
        all_files.sort()

        print(all_files)

        for filename in all_files:
            if filename != '.DS_Store':
                PDFtoText(filename).execute()
                ExtractData(filename).execute()
                self.remove_file(filename)
Beispiel #3
0
def details():
    payload = {
        'email': '*****@*****.**',
        'password': '******'
    }
    res_1 = requests.post(
        'http://api.passivereferral.com/index.php/api/authenticate/',
        json=payload)
    tok = res_1.json()
    token = tok['token']
    url = r'http://api.passivereferral.com/index.php/api/getsmtp/?token=' + token
    res = requests.get(url, json=payload)
    response = res.json()[0]
    user_name = response['user_name']
    password = response['password']
    mail_server = response['smtpname']
    ssl_enabling = response['ssl_enabled']
    token = response['token']
    if ssl_enabling == '0':
        port = None
        y = ExtractData(mail_server, None, False, user_name, password, token)
        y.convert_into_html()
Beispiel #4
0
class FileHandler:
    def __init__(self, df_data):
        self.df_data = df_data
        self.network = None
        self.network_adjusted = None
        self.df_types = None
        self.data_df = None
        self.d_id_title = None
        self.d_title_id = None
        self.views_df = None
        self.extract_data = ExtractData()
        self.logger = logging.getLogger(__name__)

    def organize_data(self):
        self.logger.info("organize data")
        try:
            data_preprocessing = DataPreprocessing(self.df_data)
            network, network_adjusted, df_types = data_preprocessing.preprocessing(
            )
            data_df = self.extract_data.create_final_df(network_adjusted)
            # data_df.to_csv("data_df.csv")
            d_id_title, d_title_id = self.extract_data.find_dicts(data_df)
            views_df = self.extract_data.create_views_df(data_df)
            return network, network_adjusted, df_types, data_df, d_id_title, d_title_id, views_df
        except Exception as err:
            self.logger.info(f"encounter error: {str(err), err.args}")

    def process_files(self):
        self.logger.info("process_files")
        network, network_adjusted, df_types, data_df, d_id_title, d_title_id, views_df = \
            self.organize_data()
        self.network = network
        self.network_adjusted = network_adjusted
        self.df_types = df_types
        self.data_df = data_df
        self.d_id_title = d_id_title
        self.d_title_id = d_title_id
        self.views_df = views_df
# -*- coding: utf-8 -*-
from extract_data import ExtractData
from math import degrees
parser = ExtractData()
parser.ExtractError()


def parse():
    error = open("errors.csv")
    X = []
    Y = []
    ANGLE = []
    for line in error:
        data = line.split(",")
        X.append(float(data[0]))
        Y.append(float(data[1]))
        ANGLE.append(float(data[2]))
    return X, Y, ANGLE


x_error, y_error, angle_error = parse()
import pylab
t = [i * 0.2 for i in range(0, len(x_error))]

pylab.plot(t, x_error, 'r')
pylab.plot(t, y_error, 'g')

ax1 = pylab.gca()
ax1.set_xlabel("Time [s]")
ax1.set_ylabel("Distance error [cm]")
pylab.legend((r'$x$', r'$y$'), shadow=True, loc=(0.84, 0.84))
import sys

from extract_data import ExtractData
from preprocess_data import PreprocessData
from utils import read_json

if __name__ == '__main__':
    path_documents = ""
    list_documents = glob.glob(path_documents + '*.json')

    # Path of each document
    for path_doc in list_documents:
        # Load json
        data = read_json(path_doc)
        # object
        obj_data = ExtractData(data)
        # call get_paper_id method

        # call get_title method

        # call get_text method

        # Object to pre-process the text
        obj_preprocess = PreprocessData(text)
        # Convert the text to lower case

        # Remove punctuation

        # Remove numbers

        # Remove stop words
Beispiel #7
0
from preprocess_data import PreprocessData
from utils import read_json
from utils import write_file

language = 'en'

if __name__ == '__main__':
    path_documents = sys.argv[1]
    list_documents = glob.glob(path_documents + '*.json')
    print("nombre de documents traitées :", len(list_documents))
    # Path of each document
    for path_doc in list_documents:
        # Load json
        data = read_json(path_doc)
        # object
        obj_data = ExtractData(data)
        # call get_text method
        text = obj_data.get_text()

        # make sure the article is in english
        if detect(text) == language:

            # call get_paper_id method
            paper_id = obj_data.get_paper_id()

            # call get_title method
            title = obj_data.get_title()

            # Object to pre-process the text
            obj_preprocess = PreprocessData(text)
            # Convert the text to lower case
from utils import *
from preprocess_data import PreprocessData
from extract_data import ExtractData

dic = {'paper_id': 'ABC00001', 'metadata': {'title': 'Covid', 'authors': []}, 'body_text': [{'text': 'Covid is coron virus', 'cite_spans': [], 'section': '', 'ref_spans': []}], 'ref_entries': {}, 'back_matter': [], 'bib_entries': {'BIBREF0': {'title': 'The possible macroeconomic effect on the UK of an influenza pandemic', 'authors': [], 'year': 2009, 'venue': '', 'volume': '', 'issn': '', 'pages': None, 'other_ids': {'DOI': []}}}}
path_file = "/Users/youssefbencheikh/Desktop/ABC00001.json"
class Test_utils(unittest.TestCase):

    def test_isnumber(self):
        self.assertEqual(is_number(12), True, "Should be True")
        self.assertEqual(is_number("Covid"), False, "Should be False")	

    def test_read_json(self):
    	self.assertEqual(read_json(path_file), dic, "Should be a dictionnary")

data = ExtractData(dic)
class Test_extract_data(unittest.TestCase):
	def test_get_paper_id(self):
		self.assertEqual(data.get_paper_id(), 'ABC00001', "Should be 'ABC00001'")
	def test_get_title(self):
		self.assertEqual(data.get_title(), 'covid', "Should be Covid")
	def test_get_text(self):
		self.assertEqual(data.get_text(), 'Covid is coron virus', "Should be Covid is coron virus")


preprocess = PreprocessData("test")

class Test_preprocess_data(unittest.TestCase):
	def test_remove_number(self):
		self.assertEqual(preprocess.remove_number("hello number 1 and 2"), 'hello number and', "Should be 'hello number and'")
	def test_lower_case(self):
Beispiel #9
0
 def requires(self):
     return ExtractData()  # tarea(s) de las que depende el Filter
 def test_extract_data(self):
     from extract_data import ExtractData
     e = ExtractData(yaml_file="../ICML2011.yaml")
     e.extract_data()
 def test_extract_data(self):
     from extract_data import ExtractData
     e = ExtractData(yaml_file="../ICML2011.yaml")
     e.extract_data()
Beispiel #12
0
# -*- coding: utf-8 -*-
from extract_data import ExtractData
parser = ExtractData()
parser.ExtractPlanSpeeds()
parser.ExtractOdomSpeeds()


def parse(filename):
    error = open(filename)
    V = []
    W = []
    for line in error:
        data = line.split(",")
        V.append(float(data[0]))
        W.append(float(data[1]))
    return V, W


v, w = parse("plan_speeds.csv")
vr, wr = parse("odom_speeds.csv")
import pylab
t = [i * 0.2 for i in range(0, len(v))]
pylab.plot(t, v)
pylab.plot(t, vr)
pylab.grid(True)
fig = pylab.figure()
pylab.plot(t, w)
pylab.plot(t, wr)
pylab.grid(True)
pylab.show()