def setFileSystemEncoding(self, useLegacyEncoding=False): sysEncoding = sys.getfilesystemencoding() if (useLegacyEncoding == True): print('Current encoding: ' + str(sysEncoding)) sys._enablelegacywindowsfsencoding() print('Current encoding: ' + str(sys.getfilesystemencoding())) else: print('setFileSystemEncoding : Not implenented for arg->True')
def files_to_dataframes(strFileStart, nSkip, index="Zeit", seperator="\t", dec=",", encode=None, excel=False, xlsxName="measurements.xlsx", sheetSlice=[0, 10]): # command for re<ding special character in filenames sys._enablelegacywindowsfsencoding() files = [] dataframes = [] # get path of curretn script file as string path = sys.argv[0] path = "/".join(path.split("/")[:-1]) + "/" # extends list with file strings of a specific start string for i in os.listdir(path): if os.path.isfile(os.path.join(path, i)) and strFileStart in i: files.append(i) for filename in files: # append label as string and corresponding dataframe to list dataframes dataframes.append([ filename, pd.read_csv(filename, skiprows=nSkip, sep=seperator, decimal=dec, encoding=encode, index_col=index) ]) #print read values to excelfile if excel == True: writer = pd.ExcelWriter(xlsxName) for i in range(0, len(dataframes)): dataframes[i][1].to_excel( writer, "{}".format(dataframes[i][0][sheetSlice[0]:sheetSlice[1]])) writer.save() return dataframes
import traceback from osgeo import ogr import pandas as pd import os import sys sys._enablelegacywindowsfsencoding() import sys def add_new_columns_to_layer(layer, column_array): for col in column_array: if ("V_" in col): new_field = ogr.FieldDefn(col, ogr.OFTReal) new_field.SetWidth(6) new_field.SetPrecision(3) else: if ("CUSEC" in col) or ("NMUN" in col) or ("FECHA" in col): new_field = ogr.FieldDefn(col, ogr.OFTString) else: new_field = ogr.FieldDefn(col, ogr.OFTInteger) if (layer): layer.CreateField(new_field) return layer # Rellenamos las columnas nuevas con los datos de los votos def add_data_in_new_columns_to_layer(layer, column_array, df): if (layer): feature = layer.GetNextFeature() while feature:
import pandas as pd from datetime import date, datetime, timedelta, time from dateutil.relativedelta import relativedelta import openpyxl as op import os import sys sys._enablelegacywindowsfsencoding() #If your filepath includes special chars like æ,ø,å ###DIRECTORY AND FILENAMES#### d = 'YOURDIRECTORY' fn = os.path.join(d, 'YOURFILE.xlsx')) ###CREATE TIMESERIES FOR MERGING### #Define starts and stops and interval size tstart = datetime(2000, 1, 1, 0, 0, 0) tstop = datetime(2000, 1, 1, 0, 30, 0) #this is 30 mins delta = timedelta(seconds=1) #Make list times = [] while tstart < tstop: times.append(tstart.strftime('%H:%M:%S')) tstart += delta #Make df, put times in there, convert t to datetime dtype, set as index tdf = pd.DataFrame() tdf['t'] = times tdf['tdt'] = pd.to_datetime(tdf['t'], format='%H:%M:%S') tdf = tdf.set_index('tdt') ###IMPORT, CLEAN, AND AGGREGATE DATA### full_data = pd.read_excel(fn) #Extract ID and date, and height and weight, store as values
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys sys.path.append(r'D:\Users\Kan\Documents\GitHub\XAPI2\languages\Python') # 解决Python 3.6的pandas不支持中文路径的问题 print(sys.getfilesystemencoding()) # 查看修改前的 try: sys._enablelegacywindowsfsencoding() # 修改 print(sys.getfilesystemencoding()) # 查看修改后的 except: pass from test_ctp_api import main configs = { 'root_dir': r'd:' + '\\test_ctp\\某用户', 'md': { 'Address': br'tcp://218.202.237.33:10012', 'BrokerID': b'9999', 'UserID': b'654321', 'Password': b'123456', }, 'td': { 'Address': br'tcp://180.168.146.187:10000', 'BrokerID': b'9999', 'UserID': b'654321', 'Password': b'123456', }, }
return _rapply(f, xs) if ispy3: import builtins import codecs import functools import io import struct if os.name == r'nt' and sys.version_info >= (3, 6): # MBCS (or ANSI) filesystem encoding must be used as before. # Otherwise non-ASCII filenames in existing repositories would be # corrupted. # This must be set once prior to any fsencode/fsdecode calls. sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr fsencode = os.fsencode fsdecode = os.fsdecode oscurdir = os.curdir.encode('ascii') oslinesep = os.linesep.encode('ascii') osname = os.name.encode('ascii') ospathsep = os.pathsep.encode('ascii') ospardir = os.pardir.encode('ascii') ossep = os.sep.encode('ascii') osaltsep = os.altsep if osaltsep: osaltsep = osaltsep.encode('ascii') osdevnull = os.devnull.encode('ascii') sysplatform = sys.platform.encode('ascii')
#Depending on setup, it's useful to do this for many files at a time. #This works for xls and xlsx files and is set up to move files from a folder within the directory called 'new' to a folder within the directory called 'calc'. I.e. during data collection new files are placed in 'new' folder and calculated as required. #Results are added to a results file which can be used for downstream statistical procedures eg. import pandas as pd from datetime import date, datetime, timedelta, time from dateutil.relativedelta import relativedelta import openpyxl as op import os import glob import shutil import xlrd import csv import sys sys.__stdout__ = sys.stdout #This fixes the fileno error you get when importing a file sys._enablelegacywindowsfsencoding() #This fixes the problem with non ascii chr in dir ###CREATE TIMESERIES FOR MERGING### #Define starts and stops and interval size tstart = datetime(2000, 1, 1, 0, 0, 0) tstop = datetime(2000, 1, 1, 0, 30, 0) delta = timedelta(seconds=1) #Make list times = [] while tstart < tstop: times.append(tstart.strftime('%H:%M:%S')) tstart += delta #Make df, put times in there, convert t to datetime dtype, set as index tdf = pd.DataFrame() tdf['t'] = times tdf['tdt'] = pd.to_datetime(tdf['t'], format='%H:%M:%S')
Simple web-scraper from Transfermarkt for any team's player minutes for a selected season in any competition or all competitions """ #%% Individual Player Match Data import requests from bs4 import BeautifulSoup import pandas as pd import numpy as np import re import networkx as nx import json import itertools import sys sys._enablelegacywindowsfsencoding() #Deal with pandas problem with reading file with accents in file path i.e Alexis Sánchez, Victor Lindelöf headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'} def scrapePlayer(pName, link, comp, team, ms, sI, sO, compNames, gN): ''' Function for scraping the information on minutes played for every game of the 17/18 EPL-season for a player in a particular team Parameters: comp: str The competition to scrap data from {"Premier League", "UEFA Champions League", "FA Cup", etc.} *Gotta be precise with this team: str Team of interest {"Man Utd", "Arsenal", etc.}
def handle_item(message): _pattern = r'[\w\D]+' _searcheditem = re.findall(_pattern, message.text) if _searcheditem is not None: try: _itemname = _searcheditem[0].split() _searchitem = _itemname[1] except IndexError: bot.send_message(message.chat.id, 'Incorrect item name') return else: _searchitem = '' bot.reply_to( message, "\nSearching info in final table for entered item name: {}\n\n\ 1. Use maximally similar names to item names in tables\n\ 2. This method works only with last final table\n".format(_searchitem)) if sys.platform == 'win32': sys._enablelegacywindowsfsencoding() _path = os.getcwd() _data_path = os.path.join(_path, 'scraped_files') _files = [ os.path.join(_data_path, i) for i in filter(lambda x: x.endswith('.csv'), os.listdir(_data_path)) ] _newest = sorted(_files, key=lambda x: os.path.getmtime(x))[-1] origin_file = pd.read_csv(_newest, sep=',') _index = 0 _index_list = list() _names_first = list() _prices_first = list() _qualities_first = list() _names_second = list() _prices_second = list() _qualities_second = list() _profit = list() _from_to_box = list() _url_first = list() _url_second = list() try: # minimum checker for one column name if "Name1" in origin_file: for item in origin_file["Name1"]: _check = similar(_searchitem, item) if _check >= 0.35: _index_list.append(_index) _names_first.append(item) _prices_first.append( origin_file.loc[origin_file['Name1'] == item, 'Price1'].values[0]) _qualities_first.append( origin_file.loc[origin_file['Name1'] == item, 'Quality1'].values[0]) _names_second.append( origin_file.loc[origin_file['Name1'] == item, 'Name2'].values[0]) _prices_second.append( origin_file.loc[origin_file['Name1'] == item, 'Price2'].values[0]) _qualities_second.append( origin_file.loc[origin_file['Name1'] == item, 'Quality2'].values[0]) _profit.append( origin_file.loc[origin_file['Name1'] == item, 'Profit_1_TO_2'].values[0]) _from_to_box.append( origin_file.loc[origin_file['Name1'] == item, 'FROM_TO'].values[0]) _url_first.append( origin_file.loc[origin_file['Name1'] == item, 'URL1'].values[0]) _url_second.append( origin_file.loc[origin_file['Name1'] == item, 'URL2'].values[0]) _index += 1 if len(_index_list) == 0: bot.send_message(message.chat.id, 'No item information found') return _headers = [ 'Index', 'Name1', 'Price1', 'Quality1', 'Name2', 'Price2', 'Quality2', 'Profit_1_TO_2', 'FROM_TO', 'URL1', 'URL2' ] df = pd.DataFrame(list( map( list, zip(_index_list, _names_first, _prices_first, _qualities_first, _names_second, _prices_second, _qualities_second, _profit, _from_to_box, _url_first, _url_second))), columns=_headers) file_name = 'iteminfo.csv' df.to_csv(file_name, index=False) _path_final = os.getcwd() _iteminfo_path = os.path.join(_path_final, file_name) doc = open(_iteminfo_path, 'rb') bot.send_document(message.chat.id, doc) store_to_db(table=_iteminfo_path) except Exception as e: bot.send_message(message.chat.id, 'Can\'t find info for item: {}'.format(_searchitem)) logging.error('{}\tError: {} Can\'t find info for item: {}'.format( dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), e, _searchitem))
def read_raw(fname, study='C', task='rest', space='avg'): ''' Read a raw file and its events using the DiamSar reading pipeline. The DiamSar reading pipeline is following: * read .set file from study-task-specific directory * load annotations from .rej file (if .rej file exists) * if study C -> add Cz reference channel * apply average reference or CSD depending on space kwarg * find events (and update them with information about load for sternberg) Parameters ---------- fname : int or string Name of the file to read. If int it is meant to be a subject identifier. It can also be int-like string for example '003'. study : str Which study to use. Studies are coded with letters in the following fashion: ===== ============ =============== study study letter study directory ===== ============ =============== I A Nowowiejska II B Wronski III C DiamSar IV D PREDiCT V E MODMA ===== ============ =============== Study ``'C'`` is used by default. task : str Task to read, 'rest' or 'sternberg'. 'sternberg' is available only for study C. space : str Data space: average referenced channel space (``"avg"``), current source density (``"csd"``) or DICS beamformer-localized source space (``"src"``). Returns ------- raw : instance of mne.io.Raw Raw data. events : numpy 2d array Events array in mne events format. Examples: --------- > raw, events = read_raw(23) > raw_rest, events = read_raw(46, study='B', task='rest') > raw_sternberg, events = read_raw('046', study='C', task='sternberg') ''' # get relevant paths and file names data_path = pth.paths.get_path('eeg', study=study, task=task) set_file, rej_file = get_file(fname, study=study, task=task) # read file # --------- # read eeglab file, ignoring some expected warnings with warnings.catch_warnings(): for msg in warnings_to_ignore_when_reading_files(): warnings.filterwarnings('ignore', msg) # make sure we are compatible with mne 0.17 and mne 0.18: event_id = dict(boundary=999, empty=999) # FIXME - test and switch to new reading if this is the latest mne try: # read data from eeglab .set file raw = mne.io.read_raw_eeglab(op.join(data_path, set_file), event_id=event_id, preload=True, verbose=False) # read events from stim channel events = mne.find_events(raw, verbose=False) except TypeError: # new read_raw_eeglab does not use event_id raw = mne.io.read_raw_eeglab(op.join(data_path, set_file), preload=True, verbose=False) # get events from raw.annotations event_id = get_task_event_id(raw, event_id, study=study, task=task) events = mne.events_from_annotations(raw, event_id=event_id)[0] # special case for PREDiCT data (they don't differentiate # closed eyes start) if study == 'D': from DiamSar.events import translate_events_D events = translate_events_D(events) # FIXME: in 0.17 / 0.18 -> boundary annotations should be already present, # check if this is needed # create annotations from boundary events is_boundary = events[:, -1] == 999 n_boundaries = is_boundary.sum() if n_boundaries > 0: margin = 0.05 onsets = events[is_boundary, 0] / raw.info['sfreq'] - margin duration = np.tile(2 * margin, n_boundaries) description = ['BAD_boundary'] * n_boundaries annot = mne.Annotations(onsets, duration, description) set_or_join_annot(raw, annot) # rej file annotations # -------------------- if rej_file is not None: # FIXME: temporary fix for pandas issue #15086 # (check which version introduced the fix) if study == 'B': import sys if sys.platform == 'win32': sys._enablelegacywindowsfsencoding() # read annotations # FIXME: could use encoding='ANSI' for study B? annot = read_rej(op.join(data_path, rej_file), raw.info['sfreq']) # set annotations or add to those already present set_or_join_annot(raw, annot) # task specific event modification - add load info to events if task == 'sternberg': from DiamSar.events import change_events_sternberg events = change_events_sternberg(events) # channel position, reference scheme # ---------------------------------- # drop stim channel and 'oko' channel if present drop_ch_names = ['oko', 'HEOG', 'VEOG', 'EKG', 'CB1', 'CB2'] drop_chan = [ch for ch in raw.ch_names if 'STI' in ch or ch in drop_ch_names] raw.drop_channels(drop_chan) # add original reference channel (Cz) in study C if study == 'C': with silent_mne(): mne.add_reference_channels(raw, 'Cz', copy=False) maxpos = get_ch_pos(raw).max() raw.info['chs'][-1]['loc'][2] = maxpos # make sure channel order is correct, else reorder chan_ord = pth.paths.get_data('chanord', study=study) if not (np.array(chan_ord) == np.array(raw.ch_names)).all(): raw.reorder_channels(chan_ord) # rename 'CZ' to 'Cz' etc. in study D if study == 'D': rename = dict() for ch in raw.ch_names: ch2 = ch if 'Z' in ch2: ch2 = ch2.replace('Z', 'z') if 'FP' in ch2: ch2 = ch2.replace('FP', 'Fp') if not ch == ch2: rename[ch] = ch2 raw.rename_channels(rename) # rereference to average or apply CSD if space == 'avg': raw.set_eeg_reference(verbose=False, projection=False) elif space == 'csd': G, H = pth.paths.get_data('GH', study=study) raw = current_source_density(raw, G, H) return raw, events