Ejemplo n.º 1
0
    def __init__(self, callback):
        tk.Toplevel.__init__(self)
        self.grab_set()
        self.title('Process file')
        self.geometry("%dx%d%+d%+d" % (400, 300, 300, 200))
        container = tk.Frame(self)
        container.pack(side="top", fill="both", expand=True)
        container.grid_rowconfigure(0, weight=1)
        container.grid_columnconfigure(0, weight=1)

        self.callback = callback

        self.processor = DataProcessor()

        self.defaultvar = 'none'
        self.radiovar = tk.StringVar(None, self.defaultvar)

        self.frames = {}
        for F in (PageTwo, PageThree, PageFour, PageFive):
            page_name = F.__name__
            frame = F(parent=container, controller=self)
            self.frames[page_name] = frame
            frame.grid(row=0, column=0, sticky="nsew")

        self.show_frame("PageTwo")
Ejemplo n.º 2
0
    def getResult(self, src, save_image=False):
        """
            @param {string} src
                   {bool}save_image output debug image
            @return {OCRDocument} doucument
        """
        pro = DataProcessor(src, ImageType.RAW, save_image=save_image)
        if pro.prepare() is None:
            logger.error('image error:{0}'.format(src))
            return None
        buffer = pro.tobinary(pro.batch())
        temp_file_name = self.create_TemporyFile(buffer, True)

        document = self.ocr.recognize(temp_file_name)
        os.remove(temp_file_name)

        output = '#' + datetime.now().strftime('%F %T.%f')[:-3] + '\n'
        output += '\n'.join(document.names()) + '\n'
        with Serializer.open_stream('../temp/corpus.txt', mode='a') as file:
            file.write(output)

        # ocr corpus data -> NaiveBayes classifier
        # ranking name swap
        change = self.naivebayes.predict_all(document.names())
        #doucument.changeNames(change)

        document.dump()
        return document
Ejemplo n.º 3
0
 def __init__(self, media, target=None):
     self.media = media
     self.target = target
     pro = DataProcessor(self.media, ImageType.PLAN)
     pro.prepare()
     self.binary = pro.batch()
     self.color = pro.color
     self.drawClipSource = True
     self.imageout_dir = '../temp/trash'
     os.makedirs(self.imageout_dir, exist_ok=True)
Ejemplo n.º 4
0
def main():
    config = ConfigParser()
    config.read("config.ini")

    interval = config.getint("main", "interval")
    base_endpoint = config.get("main", "base_endpoint")

    storage_account_conn_str = config.get("azure", "storage_account_conn_str")

    azure_client = AzureStorageClient(storage_account_conn_str, 'asdsWiki')
    data = azure_client.get_table()
    scrapper = WikiScrapper(base_endpoint)
    dataframe = scrapper.start(data, interval)
    dataProcessor = DataProcessor()
    dataProcessor.preprocess_dataframe(dataframe)
    azure_client.update_by_dataframe(dataframe)
Ejemplo n.º 5
0
def main():
    ui = UserInterface()
    url_builder = URLBuilder()
    web_dao = WebDAO()
    data_processor = DataProcessor()

    url_builder\
        .set_args_dict(ui.get_args_dict())\
            .build()

    web_dao\
        .set_download_url(url_builder.get_url())\
            .download()

    data_processor\
        .set_raw_content(web_dao.get_raw_content())

    data_processor.save_json(
        f"target/cpsc_recalls_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
Ejemplo n.º 6
0
from dataprocessor import DataProcessor
from preprocess import clean
from fileio import FileIO
from joblib import load
import numpy as np

processor = DataProcessor()
io = FileIO()

path_negation_words = 'negation'


def predict(document):

    # predict document input

    doc_clean = clean.clean_review(document)
    doc_clean_process_number = clean.number_process(doc_clean)
    print(doc_clean_process_number)
    tfidf = processor.transform(np.array([doc_clean_process_number]))

    reload = load("model_test")
    predict = reload.predict(tfidf)

    if predict == "2":
        print("POSITIVE :)")
    elif predict == "1":
        print("NEUTRAL")
    else:
        print("NEGATIVE -_-")
Ejemplo n.º 7
0
import matplotlib.pyplot as plt
import os
import numpy as np
import folium
import pytz as tz  # better alternatives -> Apache arrow or pendulum
from datetime import datetime
from PIL import Image
import urllib
import urllib.request
import wget
import ray
from scipy.spatial import Voronoi
from numpy import array

# data process
dataprocessor = DataProcessor('/Users/wangyifan/Google Drive/checkin',
                              'loc-gowalla_totalCheckins.txt')

df = dataprocessor.load_date()
df = dataprocessor.data_filter(df)
df = dataprocessor.data_process(df)
# df = df.sample(n=2000, replace=False).reset_index(drop=True)
# config: data 30000 cluster_k: 20
df = df[:30000]
df_kmeans = df.copy()
df_kmeans = df_kmeans[['lat', 'lon']]
batch_num = 10
cluster_k = 20
epsilon = 1e-4
precision = 1e-6
iteration = 10
"""
Ejemplo n.º 8
0
pd.set_option('display.max_row', 10000)
pd.set_option('display.max_columns', 1000)

if __name__ == '__main__':

    # Reading datset and removing redundant features
    data_file = "/Users/salil/Downloads/training_car.csv"
    df_read = pd.read_csv(data_file)
    df_read = df_read.drop([
        'AUCGUART', 'PRIMEUNIT', 'Nationality', 'VNZIP1', 'VNST', 'BYRNO',
        'WheelTypeID', 'PurchDate', 'VehYear'
    ],
                           axis=1)

    data = DataProcessor()
    total_columns = df_read.columns
    catcols, contcols = data.get_cat_cont_cols(df_read, total_columns)
    print("Categorical columns: ", catcols)

    uid = ['RefId']
    target = ['IsBadBuy']
    contcols = list(set(contcols) - set(uid) - set(target))
    features = catcols + contcols
    print("Numerical columns after target and id removal: ", contcols)

    df_read.Transmission[df_read.Transmission == 'Manual'] = 'MANUAL'
    df_read.Color[df_read.Color == 'NOT AVAIL'] = 'NA'
    df_read.Color[df_read.Color == 'OTHER'] = 'NA'
    df_read.TopThreeAmericanName[df_read.TopThreeAmericanName ==
                                 'OTHER'] = 'NA'
Ejemplo n.º 9
0
 def kickoffDataProcessor(self):
     from dataprocessor import DataProcessor
     d = DataProcessor(self.results)
     d.process()
     print("ALL RESULTS: \n" + str(self.results) + "\n")
Ejemplo n.º 10
0
    def initUI(self):
        #Menu
        self.menubar = self.menuBar()
        self.fileMenu = self.menubar.addMenu('&File')
        self.openAct = QAction('&Open', self)
        self.openAct.setShortcut('Ctrl+O')
        self.openAct.setIcon(QIcon("./image/open.ico"))
        self.saveAct = QAction('&Save', self)
        self.saveAct.setShortcut('Ctrl+S')
        self.saveAct.setIcon(QIcon("./image/save.ico"))
        self.importAct = QAction('&Import Data', self)
        self.importAct.setShortcut('Ctrl+I')
        self.importAct.setIcon(QIcon("./image/import.ico"))
        self.exportAct = QAction('&Export Data', self)
        self.exportAct.setShortcut('Ctrl+E')
        self.exportAct.setIcon(QIcon("./image/export.ico"))
        self.exportAct.setEnabled(False)
        self.exitAct = QAction('&Exit', self)
        self.exitAct.setIcon(QIcon("./image/exit.ico"))
        self.fileMenu.addAction(self.openAct)
        self.fileMenu.addAction(self.saveAct)
        self.fileMenu.addSeparator()
        self.fileMenu.addAction(self.importAct)
        self.fileMenu.addAction(self.exportAct)
        self.fileMenu.addSeparator()
        self.fileMenu.addAction(self.exitAct)
        self.importAct.triggered.connect(self.OnImport)
        self.exportAct.triggered.connect(self.OnExport)
        self.exitAct.triggered.connect(self.close)

        #Data Browser
        self.DataBrowser = DataBrowser(self)

        #Process Region Expansion button
        self.PRButton = QPushButton(">")
        self.PRButton.setFixedSize(20, 80)
        self.PRButton.setCheckable(True)
        self.PRButton.toggled.connect(self.showDataProcessor)

        #Data Processor
        self.DataProcessor = DataProcessor(self)

        #Image Viewer
        self.ImageViewer = ImageViewer(self)

        #Mayavi Region Expansion button
        self.MYButton = QPushButton(">")
        self.MYButton.setFixedSize(20, 80)
        self.MYButton.setCheckable(True)
        #self.MYButton.setEnabled(False)
        self.MYButton.toggled.connect(self.show3D)

        #Mayavi scene
        self.MYWidget = MYWidget(self)

        #Layout
        self.panel, self.splitter, self.Databox, self.DataWidget = self.WinLayout()
        QTimer.singleShot(10, lambda: self.splitter.moveSplitter(self.DataBrowser.minimumWidth(), 1))
        self.splitter.splitterMoved.connect(self.splitterMovedEvent)

        #center panel
        self.centralPanel = QWidget(self)
        self.centralPanel.setLayout(self.panel)
        
        self.setCentralWidget(self.centralPanel)
        self.setWindowTitle('ARPES Data Viewer -- By Wei Yao -- Ver 1.0')    
        self.show()
        self.initCompleteFlag = True
Ejemplo n.º 11
0
    def initUI(self):

        self.master.title("Analisa Harga Saham")
        self.pack(fill=BOTH, expand=True)

        self.columnconfigure(1, weight=1)
        self.columnconfigure(3, pad=7)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(5, pad=7)

        self.lbl = Label(self, text="Filename: ")
        self.lbl.grid(sticky=W, pady=4, padx=5, columnspan=4)

        self.right_frame = Frame(self, width=800, height=400, borderwidth = 1)
        self.right_frame.grid(row=2, column=1, columnspan=5, rowspan=4,
            padx=5, sticky=E+W+S+N)
        self.right_frame.config(relief=SOLID)
        self.area = Text(self.right_frame, height = 30, width = 40)
        self.area.grid(row=0, column=1,
            padx=5, sticky=W+S+N+E)
        self.splash = Toplevel(self.right_frame)
        self.splash.overrideredirect(True)
        self.splash.geometry('200x23+100+100')
        self.splash.overrideredirect(1)
        self.splash.bind("<B1-Motion>", self.move_window)
        self.splash.attributes('-topmost', 'true')
        window_height = 23
        window_width = 400

        screen_width = self.splash.winfo_screenwidth()
        screen_height = self.splash.winfo_screenheight()

        x_cordinate = int((screen_width/2) - (window_width/2))
        y_cordinate = int((screen_height/2) - (window_height/2))

        self.splash.geometry("{}x{}+{}+{}".format(window_width, window_height, x_cordinate, y_cordinate))
        self.splash.withdraw()
        pb = Progressbar(self.splash,
                orient=HORIZONTAL,
                length=400)
        pb.config(mode='indeterminate')
        pb.start(10)
        pb.grid(row=1, column=1, sticky=W+E+S+N)
        # self.splash.withdraw()
        self.dp = DataProcessor()
        # upload
        # lblUpload = Label(self, text="upload")
        # lblUpload.grid(row=1, column=0, columnspan=2)
        abtn = Button(self, text="Upload", command=self.openFile)
        abtn.grid(row=1, column=0, sticky=W, padx=5)
        self.left_frame = Frame(self, width=200, height=400, borderwidth = 1)
        self.left_frame.grid(row=2, column=0)
        self.left_frame.config(relief=SOLID)
        # self.chkBox = Checkbutton(self.left_frame, text = "All data", variable=self.is_all_data, command=self.cbCallback)
        # self.chkBox.grid(row=1, column=0, sticky=W, padx=5)
        Separator(self.left_frame,orient=HORIZONTAL).grid(row=2, columnspan=1, ipadx=75, padx=5, sticky=W)
        self.rangeFrame = self.rangeFrame() #Frame(self.left_frame, borderwidth = 1)
        self.rangeFrame.grid(row=3, column=0, columnspan=2)

        # Button Filter
        self.btnFilter = Button(self.left_frame, text="Filter", command=self.callFilter)
        self.btnFilter.grid(row=4, column=0, sticky=W, padx=5)
        Separator(self.left_frame,orient=HORIZONTAL).grid(row=5, columnspan=1, ipadx=75, padx=5, sticky=W)

        self.txSaham = Text(self.left_frame)

        self.cbSaham = Combobox(self.left_frame, textvariable=self.selected_saham)
        self.cbSaham['values'] = [] #self.kodeSaham
        self.cbSaham['state'] = 'readonly'  # normal
        self.cbSaham.set('-- Pilih Saham --')
        self.cbSaham.grid(row=7, column=0,padx=5, pady=5)

        # Buton Proses
        self.btnProses = Button(self.left_frame, text="proses", command=self.callProses)
        self.btnProses.grid(row=8, column=0, sticky=W, padx=5)
        Separator(self.left_frame,orient=HORIZONTAL).grid(row=9, columnspan=1, ipadx=75, padx=5, sticky=W)
Ejemplo n.º 12
0
        from datasources import BPMData
        data_source = BPMData()

    elif bpm_name_parsed == "all":
        from datasources_all import BPMDataAll
        data_source = BPMDataAll()

    else:
        from datasources_bpm import BPMData
        data_source = BPMData(bpm_name=bpm_name_parsed)

    if data_source is None:
        print("Data source doesn't exists!!! You can't use this program!!!")
        exit()

    data_proc_X = DataProcessor("X")
    data_proc_Z = DataProcessor("Z")
    settingsControl = SettingsControl()

    mw = MainWindow(data_source, data_proc_X, data_proc_Z, settingsControl, bpm_name_parsed)
    mw.setWindowTitle('BTMS ({})'.format(bpm_name_parsed))

    icon_path = os.path.dirname(os.path.abspath(__file__))
    mw_icon = QIcon()
    mw_icon.addFile(os.path.join(icon_path, 'etc/icons/app_icon_color.png'), QSize(32, 32))
    mw.setWindowIcon(mw_icon)

    data_source.data_ready.connect(mw.on_data1_ready)
    data_source.data_ready.connect(mw.on_data3_ready)
    data_source.data_ready.connect(data_proc_X.on_data_recv)
    data_source.data_ready.connect(data_proc_Z.on_data_recv)
Ejemplo n.º 13
0
    parser.add_argument("--visdom",
                        help="visualize training via visdom_enabled library",
                        default=True)
    parser.add_argument("--gru",
                        help="use GRU units instead of LSTM units",
                        default=False)
    parser.add_argument(
        "--sanitycheck",
        help=
        'list of words for which the nearest word embeddings are found during training, '
        'serves as sanity check, i.e. "dog family king eye"',
        default="dog family king eye")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    init_parser(parser)
    args = parser.parse_args()

    data_proc = DataProcessor(args)
    model = Rec2Vec(data_proc)

    bytes_read = 0
    epochs = 30
    for e in range(epochs):
        print(f"Starting epoch: {e}")
        bytes_read = model._train(previously_read=bytes_read, epoch=e)

    model.save(f"trained/Trefined_Oembeddings{epochs}.vec",
               model.in_embeddings)
    model.save(f"trained/Trefined_Vembeddings{epochs}.vec", model.v_embeddings)
Ejemplo n.º 14
0
    def __init__(self):
        super(MainWindow, self).__init__()
        self.ui = uic.loadUi('MainWindow.ui', self)

        self.setWindowTitle("Frequency Measurer")
        self.window_str = "None"
        self.frq_founded = 0.0

        self.buttonExit.clicked.connect(self.on_exit_button)
        self.buttonExit.clicked.connect(QApplication.instance().quit)

        self.data_source = BPMData(1024, self)
        self.data_source.data_ready.connect(self.on_data1_ready)
        self.data_source.data_ready.connect(self.on_data3_ready)

        self.data_proc_X = DataProcessor("X")
        self.data_proc_Z = DataProcessor("Z")
        self.data_source.data_ready.connect(self.data_proc_X.on_data_recv)
        self.data_source.data_ready.connect(self.data_proc_Z.on_data_recv)
        self.data_proc_X.data_processed.connect(self.on_data2_ready)
        self.data_proc_Z.data_processed.connect(self.on_data4_ready)

        self.controlWidgetX.window_changed_str.connect(
            self.data_proc_X.on_wind_changed)
        self.controlWidgetX.groupBox.setTitle("Data_X")
        self.controlWidgetX.set_str_id("Data_X")
        self.controlWidgetX.scale_changed_obj.connect(self.on_scale_changing)

        self.controlWidgetZ.window_changed_str.connect(
            self.data_proc_Z.on_wind_changed)
        self.controlWidgetZ.groupBox.setTitle("Data_Z")
        self.controlWidgetZ.set_str_id("Data_Z")
        self.controlWidgetZ.scale_changed_obj.connect(self.on_scale_changing)

        self.controlWidgetX.method_changed_str.connect(
            self.data_proc_X.on_method_changed)
        self.controlWidgetX.boards_changed.connect(
            self.data_proc_X.on_boards_changed)

        self.controlWidgetZ.method_changed_str.connect(
            self.data_proc_Z.on_method_changed)
        self.controlWidgetZ.boards_changed.connect(
            self.data_proc_Z.on_boards_changed)

        self.settingsControl = SettingsControl()
        self.settingsControl.add_object(self.controlWidgetX)
        self.settingsControl.add_object(self.controlWidgetZ)
        self.buttonRead.clicked.connect(self.on_read_button)
        self.buttonSave.clicked.connect(self.on_save_button)
        self.settingsControl.read_settings()

        self.data_proc_X.data_processed.connect(self.on_freq_status_X)
        self.data_proc_Z.data_processed.connect(self.on_freq_status_Z)

        self.plots_customization()

        self.controlWidgetX.boards_changed.connect(self.boards_X_changed)
        self.controlWidgetZ.boards_changed.connect(self.boards_Z_changed)

        self.data_curve1 = self.ui.plotX.plot(pen='r',
                                              title='Generated signal X_plot')
        self.data_curve2 = self.ui.plotFX.plot(
            pen='r', title='Fourier Transform X_plot')
        self.data_curve3 = self.ui.plotZ.plot(pen='b',
                                              title='Generated signal Z_plot')
        self.data_curve4 = self.ui.plotFZ.plot(
            pen='b', title='Fourier Transform Z_plot')
Ejemplo n.º 15
0
 def __init__(self):
     self.arduino_data_processor = DataProcessor("arduino_data", 0.025,
                                                 0.210)
     self.pub = Publisher('odom', "odom", "base_link")
Ejemplo n.º 16
0
from encoder.oneplane import OnePlaneEncoder
from encoder.sevenplanes import SevenPlaneEncoder
# import arch
import smallarch as arch
from keras.models import Sequential
from keras.layers.core import Dense
from keras.callbacks import ModelCheckpoint
from keras.utils import to_categorical

go_board_rows, go_board_cols = 19, 19
num_classes = go_board_rows * go_board_cols
num_games = 100
# encoder = OnePlaneEncoder((go_board_rows, go_board_cols))
encoder = SevenPlaneEncoder((go_board_rows, go_board_cols))

processor = DataProcessor(encoder)

generator = processor.load_go_data('train', num_games, use_generator=True)
X = generator.generate(32, num_classes)
print(X)
# test_generator =processor.load_go_data('test', num_games,use_generator=True)

# from split import Splitter
# dir = 'dataset/data'
# splitter = Splitter(data_dir=dir)
# data = splitter.draw_data('train', num_games)
# data_test = splitter.draw_data('test', num_games)

# generator = DataGenerator(dir, data)
# test_generator = DataGenerator(dir,data_test)
Ejemplo n.º 17
0
    def __init__(self,
                 data_files=[],
                 data_processor=DataProcessor(test_set_proportion=0.20,
                                              random_seed=42)):
        """
    Parameters
    ----------
    data_folder : str
      a directory path where data set files used to build models can be found
      should be suffixed with '/'
    """

        self.data_files = data_files
        self.data_processor = data_processor

        self.model_constructors = {
            'decision_tree': DecisionTreeClassifier,
            'random_forest': RandomForestClassifier,
            'naive_bayes': GaussianNB,
            'svc_linear': SVC,
            'svc_polynomial': SVC,
            'svc_rbf': SVC,
            'svc_sigmoid': SVC
        }

        self.model_params = {
            "decision_tree": {
                "constructor": None,
                "hp_search": {
                    "max_depth": np.arange(1, 100),
                    "min_samples_leaf": [1, 5, 10, 20, 50, 100, 250]
                }
            },
            "random_forest": {
                "constructor": None,
                "hp_search": {
                    "criterion": ["gini", "entropy"],
                    "n_estimators": np.arange(10, 50),
                    "max_depths": np.arange(1, 100),
                    "min_samples_leaf": [1, 5, 10, 20, 50, 100, 250]
                }
            },
            "naive_bayes": {
                "constructor": None,
                "hp_search": None
            },
            "svc_linear": {
                "constructor": {
                    "kernel": "linear"
                },
                "hp_search": {
                    'C': [0.01, 0.1, 1, 10, 100],
                    'gamma':
                    [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],
                }
            },
            "svc_polynomial": {
                "constructor": {
                    "kernel": "poly"
                },
                "hp_search": {
                    'C': [0.01, 0.1, 1, 10, 100],
                    'gamma':
                    [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],
                    'degree':
                    np.arange(3, 10)
                }
            },
            "svc_rbf": {
                "constructor": {
                    "kernel": "rbf"
                },
                "hp_search": {
                    'C': [0.01, 0.1, 1, 10, 100],
                    'gamma':
                    [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],
                }
            },
            "svc_sigmoid": {
                "constructor": {
                    "kernel": "sigmoid"
                },
                "hp_search": {
                    'C': [0.01, 0.1, 1, 10, 100],
                    'gamma':
                    [1, 0.1, 0.01, 0.001, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],
                }
            },
        }

        self.processed_data_files = {}