def transform(fin, fout, target, thr, drops, verbose=0): "Perform transformation on given CSV file" istream = fopen(fin, 'r') ostream = fopen(fout, 'wb') headers = False for line in istream.readlines(): if not headers: headers = line.replace('\n', '').split(',') if drops: new_headers = [] for idx, val in enumerate(headers): if val in drops or val == target: continue new_headers.append(val) ostream.write(','.join(new_headers)+',target\n') continue vals = [eval(v) for v in line.replace('\n', '').split(',')] row = dict(zip(headers, vals)) if thr==-1: # keep regression tval = row[target] else: # do classification if INT_PAT.match(thr) or FLOAT_PAT.match(thr): tval = 1 if float(row[target])>float(thr) else 0 else: try: cond = eval(thr) if cond: tval = 1 else: tval = 0 except: print("Please supply valid python condition, e.g. row['naccess']>10 and row['nusers']>5") sys.exit(1) new_vals = [] for key in new_headers: if key in drops or key == target: continue new_vals.append(str(row[key])) new_vals.append(str(tval)) ostream.write(','.join(new_vals)+'\n') istream.close() ostream.close()
def cmssw_test(rserie, rmajor, rminor): "Return if provided parameter match CMSSW release schema" cond = INT_PAT.match(str(rserie)) and INT_PAT.match(str(rmajor)) and INT_PAT.match(str(rminor)) return cond
def cmssw_test(rserie, rmajor, rminor): "Return if provided parameter match CMSSW release schema" cond = INT_PAT.match(str(rserie)) and INT_PAT.match( str(rmajor)) and INT_PAT.match(str(rminor)) return cond
def transform(fin, fout, idcol, target, thr, drops, verbose=0, logcols='', logall=False, logbias=2, logthr=None, logignore=''): "Perform transformation on given CSV file" istream = fopen(fin, 'r') ostream = fopen(fout, 'wb') headers = False eno = 0 logignore = logignore.split(',') if logthr and not logcols: logcols = get_log_cols(fin, logthr, logignore) for line in istream.readlines(): if not headers: headers = line.replace('\n', '').split(',') if drops: new_headers = [] for idx, val in enumerate(headers): if val in drops or val == target or val == idcol: continue new_headers.append(val) ostream.write('id,' + ','.join(new_headers) + ',target\n') continue try: item = line.replace('\n', '').replace('<nil>', '-1') vals = [eval(v) for v in item.split(',')] except Exception as exp: print("Unable to parse the line", line, type(line), exp) vals = [] for item in line.split(','): try: vals.append(eval(item)) except: vals.append(-1) if len(vals) != len(headers): raise Exception( "Unable to parse line '%s', #values != #headers", line) row = dict(zip(headers, vals)) if thr == -1: # keep regression tval = row[target] else: # do classification if INT_PAT.match(thr) or FLOAT_PAT.match(thr): tval = 1 if float(row[target]) > float(thr) else 0 else: try: cond = eval(thr) if cond: tval = 1 else: tval = 0 except: print( "Please supply valid python condition, e.g. row['naccess']>10 and row['nusers']>5" ) sys.exit(1) if idcol in row.keys(): new_vals = [str(int(row[idcol]))] else: new_vals = [getuid()] for key in new_headers: if key in drops or key == target or key == idcol: continue new_vals.append(str(row[key])) new_vals.append(str(tval)) if logcols or logall: if logall: logcols = new_headers[:] logcols = list(set(logcols) - set(logignore)) for i in xrange(len(new_headers)): if new_headers[i] in logcols: new_vals[i] = str(log(eval(new_vals[i]) + logbias)) ostream.write(','.join(new_vals) + '\n') istream.close() ostream.close()
def transform(fin, fout, target, thr, drops, verbose=0, logcols='', logall=False, logbias=2, logthr=None, logignore=''): "Perform transformation on given CSV file" istream = fopen(fin, 'r') ostream = fopen(fout, 'wb') headers = False eno = 0 logignore = logignore.split(',') if logthr and not logcols: logcols = get_log_cols(fin, logthr, logignore) for line in istream.readlines(): if not headers: headers = line.replace('\n', '').split(',') if drops: new_headers = [] for idx, val in enumerate(headers): if val in drops or val == target: continue new_headers.append(val) ostream.write(','.join(new_headers)+',target\n') continue try: item = line.replace('\n', '').replace('<nil>', '-1') vals = [eval(v) for v in item.split(',')] except Exception as exp: print("Unable to parse the line", line, type(line), exp) vals = [] for item in line.split(','): try: vals.append(eval(item)) except: vals.append(-1) if len(vals) != len(headers): raise Exception("Unable to parse line '%s', #values != #headers", line) row = dict(zip(headers, vals)) if thr==-1: # keep regression tval = row[target] else: # do classification if INT_PAT.match(thr) or FLOAT_PAT.match(thr): tval = 1 if float(row[target])>float(thr) else 0 else: try: cond = eval(thr) if cond: tval = 1 else: tval = 0 except: print("Please supply valid python condition, e.g. row['naccess']>10 and row['nusers']>5") sys.exit(1) new_vals = [] for key in new_headers: if key in drops or key == target: continue new_vals.append(str(row[key])) new_vals.append(str(tval)) if logcols or logall: if logall: logcols = new_headers[:] logcols = list(set(logcols)-set(logignore)) for i in xrange(len(new_headers)): if new_headers[i] in logcols: new_vals[i] = str(log(eval(new_vals[i])+logbias)) ostream.write(','.join(new_vals)+'\n') istream.close() ostream.close()