Beispiel #1
0
    def setFileSystemEncoding(self, useLegacyEncoding=False):
        sysEncoding = sys.getfilesystemencoding()

        if (useLegacyEncoding == True):
            print('Current encoding: ' + str(sysEncoding))
            sys._enablelegacywindowsfsencoding()
            print('Current encoding: ' + str(sys.getfilesystemencoding()))
        else:
            print('setFileSystemEncoding : Not implenented for arg->True')
Beispiel #2
0
def files_to_dataframes(strFileStart,
                        nSkip,
                        index="Zeit",
                        seperator="\t",
                        dec=",",
                        encode=None,
                        excel=False,
                        xlsxName="measurements.xlsx",
                        sheetSlice=[0, 10]):
    # command for re<ding special character in filenames
    sys._enablelegacywindowsfsencoding()

    files = []
    dataframes = []

    # get path of curretn script file as string
    path = sys.argv[0]
    path = "/".join(path.split("/")[:-1]) + "/"

    # extends list with file strings of a specific start string
    for i in os.listdir(path):
        if os.path.isfile(os.path.join(path, i)) and strFileStart in i:
            files.append(i)

    for filename in files:
        # append label as string and corresponding dataframe to list dataframes
        dataframes.append([
            filename,
            pd.read_csv(filename,
                        skiprows=nSkip,
                        sep=seperator,
                        decimal=dec,
                        encoding=encode,
                        index_col=index)
        ])

    #print read values to excelfile
    if excel == True:
        writer = pd.ExcelWriter(xlsxName)
        for i in range(0, len(dataframes)):
            dataframes[i][1].to_excel(
                writer,
                "{}".format(dataframes[i][0][sheetSlice[0]:sheetSlice[1]]))
        writer.save()

    return dataframes
Beispiel #3
0
import traceback
from osgeo import ogr
import pandas as pd
import os
import sys

sys._enablelegacywindowsfsencoding()
import sys


def add_new_columns_to_layer(layer, column_array):
    for col in column_array:
        if ("V_" in col):
            new_field = ogr.FieldDefn(col, ogr.OFTReal)
            new_field.SetWidth(6)
            new_field.SetPrecision(3)
        else:
            if ("CUSEC" in col) or ("NMUN" in col) or ("FECHA" in col):
                new_field = ogr.FieldDefn(col, ogr.OFTString)
            else:
                new_field = ogr.FieldDefn(col, ogr.OFTInteger)
        if (layer): layer.CreateField(new_field)
    return layer

    # Rellenamos las columnas nuevas con los datos de los votos


def add_data_in_new_columns_to_layer(layer, column_array, df):
    if (layer):
        feature = layer.GetNextFeature()
        while feature:
import pandas as pd
from datetime import date, datetime, timedelta, time
from dateutil.relativedelta import relativedelta
import openpyxl as op
import os
import sys
sys._enablelegacywindowsfsencoding() #If your filepath includes special chars like æ,ø,å

###DIRECTORY AND FILENAMES####
d = 'YOURDIRECTORY'
fn = os.path.join(d, 'YOURFILE.xlsx'))

###CREATE TIMESERIES FOR MERGING###
#Define starts and stops and interval size
tstart = datetime(2000, 1, 1, 0, 0, 0) 
tstop = datetime(2000, 1, 1, 0, 30, 0) #this is 30 mins
delta = timedelta(seconds=1)
#Make list
times = []
while tstart < tstop: 
    times.append(tstart.strftime('%H:%M:%S'))
    tstart += delta
#Make df, put times in there, convert t to datetime dtype, set as index
tdf = pd.DataFrame()
tdf['t'] = times
tdf['tdt'] = pd.to_datetime(tdf['t'], format='%H:%M:%S')
tdf = tdf.set_index('tdt')

###IMPORT, CLEAN, AND AGGREGATE DATA###
full_data = pd.read_excel(fn)
#Extract ID and date, and height and weight, store as values
Beispiel #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys

sys.path.append(r'D:\Users\Kan\Documents\GitHub\XAPI2\languages\Python')

# 解决Python 3.6的pandas不支持中文路径的问题
print(sys.getfilesystemencoding())  # 查看修改前的
try:
    sys._enablelegacywindowsfsencoding()  # 修改
    print(sys.getfilesystemencoding())  # 查看修改后的
except:
    pass

from test_ctp_api import main

configs = {
    'root_dir': r'd:' + '\\test_ctp\\某用户',
    'md': {
        'Address': br'tcp://218.202.237.33:10012',
        'BrokerID': b'9999',
        'UserID': b'654321',
        'Password': b'123456',
    },
    'td': {
        'Address': br'tcp://180.168.146.187:10000',
        'BrokerID': b'9999',
        'UserID': b'654321',
        'Password': b'123456',
    },
}
    return _rapply(f, xs)


if ispy3:
    import builtins
    import codecs
    import functools
    import io
    import struct

    if os.name == r'nt' and sys.version_info >= (3, 6):
        # MBCS (or ANSI) filesystem encoding must be used as before.
        # Otherwise non-ASCII filenames in existing repositories would be
        # corrupted.
        # This must be set once prior to any fsencode/fsdecode calls.
        sys._enablelegacywindowsfsencoding()  # pytype: disable=module-attr

    fsencode = os.fsencode
    fsdecode = os.fsdecode
    oscurdir = os.curdir.encode('ascii')
    oslinesep = os.linesep.encode('ascii')
    osname = os.name.encode('ascii')
    ospathsep = os.pathsep.encode('ascii')
    ospardir = os.pardir.encode('ascii')
    ossep = os.sep.encode('ascii')
    osaltsep = os.altsep
    if osaltsep:
        osaltsep = osaltsep.encode('ascii')
    osdevnull = os.devnull.encode('ascii')

    sysplatform = sys.platform.encode('ascii')
Beispiel #7
0
#Depending on setup, it's useful to do this for many files at a time. 
#This works for xls and xlsx files and is set up to move files from a folder within the directory called 'new' to a folder within the directory called 'calc'. I.e. during data collection new files are placed in 'new' folder and calculated as required. 
#Results are added to a results file which can be used for downstream statistical procedures eg. 

import pandas as pd
from datetime import date, datetime, timedelta, time
from dateutil.relativedelta import relativedelta
import openpyxl as op
import os
import glob
import shutil
import xlrd
import csv
import sys
sys.__stdout__ = sys.stdout #This fixes the fileno error you get when importing a file
sys._enablelegacywindowsfsencoding() #This fixes the problem with non ascii chr in dir

###CREATE TIMESERIES FOR MERGING###
#Define starts and stops and interval size
tstart = datetime(2000, 1, 1, 0, 0, 0) 
tstop = datetime(2000, 1, 1, 0, 30, 0) 
delta = timedelta(seconds=1)
#Make list
times = []
while tstart < tstop: 
    times.append(tstart.strftime('%H:%M:%S'))
    tstart += delta
#Make df, put times in there, convert t to datetime dtype, set as index
tdf = pd.DataFrame()
tdf['t'] = times
tdf['tdt'] = pd.to_datetime(tdf['t'], format='%H:%M:%S')
Beispiel #8
0
Simple web-scraper from Transfermarkt for any team's player minutes for a selected season 
in any competition or all competitions 

"""
#%% Individual Player Match Data
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import re
import networkx as nx
import json
import itertools
import sys
sys._enablelegacywindowsfsencoding() #Deal with pandas problem with reading file with accents in file path i.e Alexis Sánchez, Victor Lindelöf 


headers = {'User-Agent': 
           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'} 
    
def scrapePlayer(pName, link, comp, team, ms, sI, sO, compNames, gN):
    '''
    Function for scraping the information on minutes played for every game of the 17/18 EPL-season for a player
    in a particular team 
    
    Parameters:
            comp: str
                The competition to scrap data from {"Premier League", "UEFA Champions League", "FA Cup", etc.} *Gotta be precise with this 
            team: str
                Team of interest {"Man Utd", "Arsenal", etc.}
Beispiel #9
0
def handle_item(message):

    _pattern = r'[\w\D]+'
    _searcheditem = re.findall(_pattern, message.text)

    if _searcheditem is not None:
        try:
            _itemname = _searcheditem[0].split()
            _searchitem = _itemname[1]
        except IndexError:
            bot.send_message(message.chat.id, 'Incorrect item name')
            return
    else:
        _searchitem = ''

    bot.reply_to(
        message,
        "\nSearching info in final table for entered item name: {}\n\n\
    1. Use maximally similar names to item names in tables\n\
    2. This method works only with last final table\n".format(_searchitem))

    if sys.platform == 'win32':
        sys._enablelegacywindowsfsencoding()

    _path = os.getcwd()
    _data_path = os.path.join(_path, 'scraped_files')
    _files = [
        os.path.join(_data_path, i)
        for i in filter(lambda x: x.endswith('.csv'), os.listdir(_data_path))
    ]
    _newest = sorted(_files, key=lambda x: os.path.getmtime(x))[-1]
    origin_file = pd.read_csv(_newest, sep=',')

    _index = 0
    _index_list = list()
    _names_first = list()
    _prices_first = list()
    _qualities_first = list()
    _names_second = list()
    _prices_second = list()
    _qualities_second = list()
    _profit = list()
    _from_to_box = list()
    _url_first = list()
    _url_second = list()

    try:
        # minimum checker for one column name
        if "Name1" in origin_file:
            for item in origin_file["Name1"]:

                _check = similar(_searchitem, item)

                if _check >= 0.35:
                    _index_list.append(_index)
                    _names_first.append(item)
                    _prices_first.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Price1'].values[0])
                    _qualities_first.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Quality1'].values[0])
                    _names_second.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Name2'].values[0])
                    _prices_second.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Price2'].values[0])
                    _qualities_second.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Quality2'].values[0])
                    _profit.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'Profit_1_TO_2'].values[0])
                    _from_to_box.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'FROM_TO'].values[0])
                    _url_first.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'URL1'].values[0])
                    _url_second.append(
                        origin_file.loc[origin_file['Name1'] == item,
                                        'URL2'].values[0])
                    _index += 1

        if len(_index_list) == 0:
            bot.send_message(message.chat.id, 'No item information found')
            return
        _headers = [
            'Index', 'Name1', 'Price1', 'Quality1', 'Name2', 'Price2',
            'Quality2', 'Profit_1_TO_2', 'FROM_TO', 'URL1', 'URL2'
        ]
        df = pd.DataFrame(list(
            map(
                list,
                zip(_index_list, _names_first, _prices_first, _qualities_first,
                    _names_second, _prices_second, _qualities_second, _profit,
                    _from_to_box, _url_first, _url_second))),
                          columns=_headers)
        file_name = 'iteminfo.csv'
        df.to_csv(file_name, index=False)
        _path_final = os.getcwd()
        _iteminfo_path = os.path.join(_path_final, file_name)
        doc = open(_iteminfo_path, 'rb')
        bot.send_document(message.chat.id, doc)
        store_to_db(table=_iteminfo_path)
    except Exception as e:
        bot.send_message(message.chat.id,
                         'Can\'t find info for item: {}'.format(_searchitem))
        logging.error('{}\tError: {} Can\'t find info for item: {}'.format(
            dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), e, _searchitem))
Beispiel #10
0
def read_raw(fname, study='C', task='rest', space='avg'):
    '''
    Read a raw file and its events using the DiamSar reading pipeline.

    The DiamSar reading pipeline is following:
    * read .set file from study-task-specific directory
    * load annotations from .rej file (if .rej file exists)
    * if study C -> add Cz reference channel
    * apply average reference or CSD depending on space kwarg
    * find events (and update them with information about load for sternberg)

    Parameters
    ----------
    fname : int or string
        Name of the file to read. If int it is meant to be a subject
        identifier. It can also be int-like string for example '003'.
    study : str
        Which study to use. Studies are coded with letters in the following
        fashion:

        =====   ============   ===============
        study   study letter   study directory
        =====   ============   ===============
        I       A              Nowowiejska
        II      B              Wronski
        III     C              DiamSar
        IV      D              PREDiCT
        V       E              MODMA
        =====   ============   ===============

        Study ``'C'`` is used by default.
    task : str
        Task to read, 'rest' or 'sternberg'. 'sternberg' is available only for
        study C.
    space : str
        Data space: average referenced channel space (``"avg"``), current
        source density (``"csd"``) or DICS beamformer-localized source space
        (``"src"``).

    Returns
    -------
    raw : instance of mne.io.Raw
        Raw data.
    events : numpy 2d array
        Events array in mne events format.

    Examples:
    ---------
    > raw, events = read_raw(23)
    > raw_rest, events = read_raw(46, study='B', task='rest')
    > raw_sternberg, events = read_raw('046', study='C', task='sternberg')
    '''

    # get relevant paths and file names
    data_path = pth.paths.get_path('eeg', study=study, task=task)
    set_file, rej_file = get_file(fname, study=study, task=task)

    # read file
    # ---------
    # read eeglab file, ignoring some expected warnings
    with warnings.catch_warnings():
        for msg in warnings_to_ignore_when_reading_files():
            warnings.filterwarnings('ignore', msg)
        # make sure we are compatible with mne 0.17 and mne 0.18:
        event_id = dict(boundary=999, empty=999)

        # FIXME - test and switch to new reading if this is the latest mne
        try:
            # read data from eeglab .set file
            raw = mne.io.read_raw_eeglab(op.join(data_path, set_file),
                                         event_id=event_id, preload=True,
                                         verbose=False)
            # read events from stim channel
            events = mne.find_events(raw, verbose=False)
        except TypeError:
            # new read_raw_eeglab does not use event_id
            raw = mne.io.read_raw_eeglab(op.join(data_path, set_file),
                                         preload=True, verbose=False)
            # get events from raw.annotations
            event_id = get_task_event_id(raw, event_id, study=study,
                                         task=task)
            events = mne.events_from_annotations(raw, event_id=event_id)[0]

    # special case for PREDiCT data (they don't differentiate
    # closed eyes start)
    if study == 'D':
        from DiamSar.events import translate_events_D
        events = translate_events_D(events)

    # FIXME: in 0.17 / 0.18 -> boundary annotations should be already present,
    #        check if this is needed
    # create annotations from boundary events
    is_boundary = events[:, -1] == 999
    n_boundaries = is_boundary.sum()
    if n_boundaries > 0:
        margin = 0.05
        onsets = events[is_boundary, 0] / raw.info['sfreq'] - margin
        duration = np.tile(2 * margin, n_boundaries)
        description = ['BAD_boundary'] * n_boundaries
        annot = mne.Annotations(onsets, duration, description)
        set_or_join_annot(raw, annot)

    # rej file annotations
    # --------------------
    if rej_file is not None:
        # FIXME: temporary fix for pandas issue #15086
        #        (check which version introduced the fix)
        if study == 'B':
            import sys
            if sys.platform == 'win32':
                sys._enablelegacywindowsfsencoding()

        # read annotations
        # FIXME: could use encoding='ANSI' for study B?
        annot = read_rej(op.join(data_path, rej_file), raw.info['sfreq'])

        # set annotations or add to those already present
        set_or_join_annot(raw, annot)

    # task specific event modification - add load info to events
    if task == 'sternberg':
        from DiamSar.events import change_events_sternberg
        events = change_events_sternberg(events)

    # channel position, reference scheme
    # ----------------------------------

    # drop stim channel and 'oko' channel if present
    drop_ch_names = ['oko', 'HEOG', 'VEOG', 'EKG', 'CB1', 'CB2']
    drop_chan = [ch for ch in raw.ch_names
                 if 'STI' in ch or ch in drop_ch_names]
    raw.drop_channels(drop_chan)

    # add original reference channel (Cz) in study C
    if study == 'C':
        with silent_mne():
            mne.add_reference_channels(raw, 'Cz', copy=False)
            maxpos = get_ch_pos(raw).max()
            raw.info['chs'][-1]['loc'][2] = maxpos

    # make sure channel order is correct, else reorder
    chan_ord = pth.paths.get_data('chanord', study=study)
    if not (np.array(chan_ord) == np.array(raw.ch_names)).all():
        raw.reorder_channels(chan_ord)

    # rename 'CZ' to 'Cz' etc. in study D
    if study == 'D':
        rename = dict()
        for ch in raw.ch_names:
            ch2 = ch
            if 'Z' in ch2:
                ch2 = ch2.replace('Z', 'z')
            if 'FP' in ch2:
                ch2 = ch2.replace('FP', 'Fp')
            if not ch == ch2:
                rename[ch] = ch2
        raw.rename_channels(rename)

    # rereference to average or apply CSD
    if space == 'avg':
        raw.set_eeg_reference(verbose=False, projection=False)
    elif space == 'csd':
        G, H = pth.paths.get_data('GH', study=study)
        raw = current_source_density(raw, G, H)

    return raw, events