Example #1
0
def CombineFeatureLabel(f_feature, f_label, f_out):
	pool = mp.Pool(com.__n_process)
	
	ffs = ['%s.%d.csv' % (util.file_basename(f_feature),i) for i in range(com.__n_process)]
	fos = ['%s.%d.csv' % (util.file_basename(f_out),i) for i in range(com.__n_process)]
	
	args_list = []
	for i in range(com.__n_process):
		args_list.append((ffs[i],f_label, fos[i]))
		
	pool.map(_CombineFeatureLabel, args_list)
Example #2
0
    def test_file_basename(self):
        self.assertRaises(TypeError, util.file_basename, self.ione)
        self.assertRaises(TypeError, util.file_basename, self.fone)
        self.assertRaises(TypeError, util.file_basename, self.btrue)
        self.assertRaises(TypeError, util.file_basename, self.tsimple)
        self.assertRaises(TypeError, util.file_basename, self.lsimple)

	teststring1 = "file.txt"
	self.assertEqual(util.file_basename(teststring1), "file")

	teststring2 = "/home/user/file2.ext"
	self.assertEqual(util.file_basename(teststring2), "file2")
Example #3
0
def TestModelOnData(modelname, fdata, flabel):
	actual_set = com.GetBuySet(flabel)
	rec_set = set()
	
	f_base = util.file_basename(fdata)
	
	re_str = f_base.replace('.',r'\.') + r'\.\d+\.csv$'
	f_list = util.FilterFile(re_str) #['%s.%d.csv' % (f_base, j) for j in range(com.__n_process)]
	

	for f in f_list:
		r , p, y = _ParTestModelOnData((modelname, f))
		
		rec_set |= r
		pred_prob = np.concatenate([pred_prob,p])
		Y_true = np.concatenate([Y_true, y])
		
	
	TP = len(rec_set & actual_set)
	TN = len(rec_set - actual_set)
	FP = len(actual_set - rec_set)
	
	PrintConfuseMatrix(TP, TN, FP)
	P, R, F1 =  GetPRF1(TP, TN, FP)
	PrintPRF1(P, R, F1)
	
	print 'AUC:', roc_auc_score( Y_true.astype(int),  pred_prob)
	
	
	return TP, TN, FP, P, R, F1, pred_prob,Y_true
Example #4
0
def FilterCSV(fn):
	
	ft = '%s.nofilter.csv' % util.file_basename(fn)
	if not os.path.exists(ft):
		os.rename(fn, ft)
	
	fo = fn
	fn = ft
	
	
	
	block_size = 100000
	reader = pandas.read_csv(fn, iterator=True, chunksize=block_size)
	
	mod = 'w'
	header = True
	i = 0
	
	rules = [LastdayRule]
	for data in reader:
		FilterDataWithRule(data, rules).to_csv(fo, mode=mod, header=header,index=False)
		
		mod = 'a'
		header=False
		
		i = i + len(data)
		print 'process %d rows.' % i 
Example #5
0
def FilterCSV(fn):

    ft = '%s.nofilter.csv' % util.file_basename(fn)
    if not os.path.exists(ft):
        os.rename(fn, ft)

    fo = fn
    fn = ft

    block_size = 100000
    reader = pandas.read_csv(fn, iterator=True, chunksize=block_size)

    mod = 'w'
    header = True
    i = 0

    rules = [LastdayRule]
    for data in reader:
        FilterDataWithRule(data, rules).to_csv(fo,
                                               mode=mod,
                                               header=header,
                                               index=False)

        mod = 'a'
        header = False

        i = i + len(data)
        print 'process %d rows.' % i
Example #6
0
def CombineFeatureLabel(f_feature, f_label, f_out):
    pool = mp.Pool(com.__n_process)

    ffs = [
        '%s.%d.csv' % (util.file_basename(f_feature), i)
        for i in range(com.__n_process)
    ]
    fos = [
        '%s.%d.csv' % (util.file_basename(f_out), i)
        for i in range(com.__n_process)
    ]

    args_list = []
    for i in range(com.__n_process):
        args_list.append((ffs[i], f_label, fos[i]))

    pool.map(_CombineFeatureLabel, args_list)
Example #7
0
def CreateFeature(file, mode):
	fid = util.file_basename_id(file)
	GenFeature = util.load_model_from_name(util.file_basename(file)).GenFeature
	if mode=='train':
		GenFeature('tianchi_mobile_recommend_train_user.csv', 'feature%d.csv' % fid, lastday='2014-12-17')
	if mode=='submit':
		GenFeature('tianchi_mobile_recommend_train_user.csv', 'feature_total%d.csv' % fid, lastday='2014-12-18')
	if mode=='test':
		GenFeature('tianchi_mobile_recommend_train_user.csv', 'feature_test%d.csv' % fid, lastday='2014-12-16')
Example #8
0
def CreateFeature(file, mode):
    fid = util.file_basename_id(file)
    GenFeature = util.load_model_from_name(util.file_basename(file)).GenFeature
    if mode == 'train':
        GenFeature('tianchi_mobile_recommend_train_user.csv',
                   'feature%d.csv' % fid,
                   lastday='2014-12-17')
    if mode == 'submit':
        GenFeature('tianchi_mobile_recommend_train_user.csv',
                   'feature_total%d.csv' % fid,
                   lastday='2014-12-18')
    if mode == 'test':
        GenFeature('tianchi_mobile_recommend_train_user.csv',
                   'feature_test%d.csv' % fid,
                   lastday='2014-12-16')
Example #9
0
		FilterDataWithRule(data, rules).to_csv(fo, mode=mod, header=header,index=False)
		
		mod = 'a'
		header=False
		
		i = i + len(data)
		print 'process %d rows.' % i 
		
if __name__ == '__main__':
	if sys.argv[1]=='train':
		ff = 'feature.merge.csv'
		fl = 'label.csv'
		fd = 'data.csv'
	elif sys.argv[1]=='test':
		ff = 'feature_test.merge.csv'
		fl = 'label_test.csv'
		fd = 'data.test.csv'
	elif sys.argv[1]=='submit':
		ff = 'feature_total.merge.csv'
	else:
		print __doc__
		sys.exit()
	
	pool = mp.Pool(com.__n_process)
	
	fs = util.FilterFile(util.file_basename(ff).replace('.',r'\.') + r'\.\d+\.csv')
	#print fs
	pool.map(FilterCSV, fs)
		
	
	
Example #10
0
                  '--number',
                  default=10000,
                  dest='count',
                  help='sample number')
parser.add_option('-p',
                  '--prob',
                  type='float',
                  dest='p',
                  help='sample probability')
#parser.add_option('-f','--file', dest='fname',help='file name to sample')

(options, args) = parser.parse_args()
#print options,args
#sys.exit()

fname = util.file_basename(args[0])
fd = open('%s.sample.csv' % fname, 'wb')
writer = csv.writer(fd, delimiter=',')
with open('%s.csv' % fname, 'rb') as f:
    reader = csv.reader(f, delimiter=',')
    i = 0

    for row in reader:

        if i == 0 or options.p is None or random.random() < options.p:
            writer.writerow(row)

        i = i + 1
        if options.p is None and i == options.count:
            break
        if i % 100000 == 0:
Example #11
0
def GetFeature(data):
	fn = [i for i in data.columns if i not in ['user_id','item_id', 'buy']]
	data.assign(user_conver_rate=lambda x: x.user_buy_count/x.user_click_count)
	df = data[fn].apply(lambda x: np.log(x+1), axis=1)
	return df
	
	


	
if __name__ == '__main__':
	X,Y = GetData()
	
	
	
	model_file = '%s.model' % util.file_basename(__file__)
	if not os.path.exists(model_file):
		lr = LogisticRegression(penalty='l1')
		lr.fit(X,Y)
		util.save_obj(lr, model_file)
	else:
		lr = util.load_obj(model_file)
	
	fn = X.columns.values 
	pred = lr.predict_proba(X)[:,1]
	for i in range(len(fn)):
		print fn[i], lr.coef_[0][i]
	print f1_score(Y,pred>.5), roc_auc_score(X,pred)
	
	
	
Example #12
0
                                               index=False)

        mod = 'a'
        header = False

        i = i + len(data)
        print 'process %d rows.' % i


if __name__ == '__main__':
    if sys.argv[1] == 'train':
        ff = 'feature.merge.csv'
        fl = 'label.csv'
        fd = 'data.csv'
    elif sys.argv[1] == 'test':
        ff = 'feature_test.merge.csv'
        fl = 'label_test.csv'
        fd = 'data.test.csv'
    elif sys.argv[1] == 'submit':
        ff = 'feature_total.merge.csv'
    else:
        print __doc__
        sys.exit()

    pool = mp.Pool(com.__n_process)

    fs = util.FilterFile(
        util.file_basename(ff).replace('.', r'\.') + r'\.\d+\.csv')
    #print fs
    pool.map(FilterCSV, fs)
Example #13
0
# coding: utf-8
''' 
take subset of data
usage: subset.py filename 1,2,3,4
'''

import csv, sys, util

if len(sys.argv)!=3:
	print __doc__
else:
	cols = [int(i) for i in sys.argv[2].split(',')]
	f = open(sys.argv[1], 'rb')
	fr = csv.reader(f, delimiter=',')
	
	fd = open('%s.subset_%s.csv' % (util.file_basename(sys.argv[1]), '_'.join([str(i) for i in cols])), 'wb')
	fw = csv.writer(fd, delimiter=',')
	
	nrows = 0
	for row in fr:
		fw.writerow([row[i] for i in cols])
		nrows = nrows + 1
		if nrows%100000==0:
			print 'processed %d rows!' % nrows
	
	f.close()
	fd.close()
	
	

Example #14
0
	print 'usage  python merge_fast.py [f1 ... fn fo]'
	sys.exit()
	
f = []
fr = []

fns = len(sys.argv)-2
for i in range(1,fns+1):
	fd = open(sys.argv[i],'rb')
	f.append(fd)
	reader = csv.reader(fd, delimiter=',')
	fr.append(reader)


fo = sys.argv[-1]
fo_base = util.file_basename(fo)
fo_list = [open('%s.%d.csv' % (fo_base, j),'wb' ) for j in range(com.__n_process)]
fw_list = [csv.writer(fo, delimiter=',') for fo in fo_list]
fidx = 0

header = fr[0].next()
for i in range(1,fns):
	header = header + fr[i].next()[2:]

map(lambda fw: fw.writerow(header), fw_list) # write header

nrows = 0
for row in fr[0]:
	for i in range(1,fns):
		newdata = fr[i].next()
		if (row[0]!=newdata[0] or row[1]!=newdata[1]):
Example #15
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    # load defaults so we can use them below
    from config import FragItConfig
    cfg = FragItConfig()

    parser = OptionParser(usage=strings.usage,
                description=strings.description,
                version=strings.version_str)
    parser.add_option("-o", "--output", dest="outputfile", type=str, default="", metavar="filename")

    configuration = OptionGroup(parser, "Configuration")
    general = OptionGroup(parser, "Fragmentation")
    output = OptionGroup(parser, "Output")

    configuration.add_option("--use-config", dest="useconfigfile", type=str, default="", metavar="filename",
                    help="Specify configuration file to use. This will ignore other command line parameters.")
    configuration.add_option("--make-config", dest="makeconfigfile", type=str, default="", metavar="filename",
                    help="Specify a filename to use as a configuration file. Use command line options to modify defaults. It is possible to use this command without specifying an input file to generate a clean configuration file.")

    general.add_option("-m", "--maxfragsize", dest="maxFragmentSize", type=int, default=cfg.getMaximumFragmentSize(),metavar="integer",
                help="The maximum fragment size allowed [default: %default]")
    general.add_option("-g", "--groupcount", dest="groupcount", type=int, default=cfg.getFragmentGroupCount(),metavar="integer",
                help="Specify number of consecutive fragments to combine into a single fragment [default: %default]")
    general.add_option("--disable-protection", dest="disable_protection", action="store_true", default=False,
                help="Specify this flag to disable the use protection patterns.")
    general.add_option("--merge-glycine", action="store_true", dest="merge_glycine", default=False,
                help="Merge a glycine to the neighbor fragment when fragmenting proteins.")
    general.add_option("--merge-specific", dest="mergespecific", type=int, default=None, metavar="integer",
                       help="Merge a specific fragment into all other fragments and remove it as a singular fragment.")
    general.add_option("--charge-model", dest="charge_model", default=cfg.getChargeModel(),
                      help="Charge model to use [%default]")
    general.add_option("--combine-fragments", dest="combinefragments", type=str, default="",metavar="list of integers",
                       help="Combines several fragments into one.")
    output.add_option("--output-format", dest="format", type=str, default=cfg.getWriter(),
                help="Output format [%default]")
    output.add_option("--output-boundaries", dest="boundaries", type=str, default="",metavar="list of floats",
                help="Specifies boundaries for multiple layers. Must be used with --central-fragment option")
    output.add_option("--output-central-fragment", dest="central_fragment", type=int, default=cfg.getCentralFragmentID(), metavar="integer",
                help="Specifies the fragment to use as the central one. Used in combination with --output-boundaries to make layered inputs")
    output.add_option("--output-active-distance", dest="active_atoms_distance", type=float, default=cfg.getActiveAtomsDistance(), metavar="float",
                help="Atoms within this distance from --output-central-fragment will be active. Use with --output-buffer-distance to add buffer region between active and frozen parts. [default: %default]")
    output.add_option("--output-buffer-distance", dest="maximum_buffer_distance", type=float, default=cfg.getBufferDistance(), metavar="float",
                help="Maximum distance in angstrom from active fragments from which to include nearby fragments as buffers. This option adds and extends to --output-boundaries. [default: %default]")
    output.add_option("--output-freeze-backbone", dest="freeze_backbone", action="store_true", default=cfg.getFreezeBackbone(),
                help="Option to freeze the backbone of the active region.")
    output.add_option("--output-jmol-script", dest="output_jmol_script", action="store_true", default=cfg.getWriteJmolScript(),
                help="Write a complimentary jmol script for visualization.")
    output.add_option("--output-pymol-script", dest="output_pymol_script", action="store_true", default=cfg.getWritePymolScript(),
                help="Write a complimentary pymol script for visualization.")

    parser.add_option_group(configuration)
    parser.add_option_group(general)
    parser.add_option_group(output)
    (options, args) = parser.parse_args(argv)

    if len(args) == 0 and len(options.makeconfigfile) > 0:
        cfg.writeConfigurationToFile(options.makeconfigfile)
        sys.exit()

    if len(args) != 1:
        parser.print_help()
        sys.exit()

    infile = args[0]

    molecule = fileToMol(infile)
    fragmentation = Fragmentation(molecule)


    # if there is a config file, read it and ignore other command line options
    if len(options.useconfigfile) > 0:
        fragmentation.readConfigurationFromFile(options.useconfigfile)
        (writer, output_extension) = get_writer_and_extension(fragmentation.getOutputFormat())
    else:
        fragmentation.setChargeModel(options.charge_model)
        fragmentation.setMaximumFragmentSize(options.maxFragmentSize)
        fragmentation.setOutputFormat(options.format)
        if options.groupcount > 1: fragmentation.setFragmentGroupCount(options.groupcount)

        (writer, output_extension) = get_writer_and_extension(options.format)

    outfile = "%s%s" % (file_basename(infile), output_extension)
    if len(options.outputfile) > 0:
        outfile = options.outputfile


    # do the fragmentation procedure
    # 'fragmentation.doFragmentSpecificMerging()' should go somewhere here...
    fragmentation.setCombineFragments(options.combinefragments)
    if options.disable_protection:
        fragmentation.clearProtectPatterns()
    if options.merge_glycine:
        fragmentation.enableMergeGlycinePattern()
    fragmentation.beginFragmentation()
    fragmentation.doFragmentation()
    fragmentation.doFragmentMerging()
    fragmentation.doFragmentCombination()
    if fragmentation.getFragmentGroupCount() > 1:
        fragmentation.doFragmentGrouping()
    fragmentation.finishFragmentation()

    # write to file
    out = writer(fragmentation)

    # set options from command line
    boundaries = options.boundaries
    central_fragment = options.central_fragment
    active_atoms_distance = options.active_atoms_distance
    maximum_buffer_distance = options.maximum_buffer_distance
    freeze_backbone = options.freeze_backbone
    output_pymol_script = options.output_pymol_script
    output_jmol_script = options.output_jmol_script

    # set options from config file
    if len(options.useconfigfile) > 0:
        boundaries = fragmentation.getBoundaries()
        central_fragment = fragmentation.getCentralFragmentID()
        output_pymol_script = fragmentation.getWritePymolScript()
        output_jmol_script = fragmentation.getWriteJmolScript()
        freeze_backbone = fragmentation.getFreezeBackbone()
        maximum_buffer_distance = fragmentation.getBufferDistance()
        active_atoms_distance = fragmentation.getActiveAtomsDistance()

    # set the options
    out.setBoundariesFromString(boundaries)
    out.setCentralFragmentID(central_fragment)
    out.setActiveAtomsDistance(active_atoms_distance)
    out.setBufferMaxDistance(maximum_buffer_distance)
    if freeze_backbone: out.setFreezeBackbone()
    if output_pymol_script: out.setPymolOutput(infile,outfile)
    if output_jmol_script: out.setJmolOutput(infile,outfile)

    out.setup()
    out.writeFile(outfile)

    # write configuration file
    if len(options.makeconfigfile) > 0:
        fragmentation.setBoundaries(boundaries)
        fragmentation.writeConfigurationToFile(options.makeconfigfile)
Example #16
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    # load defaults so we can use them below
    from config import FragItConfig
    cfg = FragItConfig()

    parser = OptionParser(usage=strings.usage,
                          description=strings.description,
                          version=strings.version_str)
    parser.add_option("-o",
                      "--output",
                      dest="outputfile",
                      type=str,
                      default="",
                      metavar="filename")

    configuration = OptionGroup(parser, "Configuration")
    general = OptionGroup(parser, "Fragmentation")
    output = OptionGroup(parser, "Output")

    configuration.add_option(
        "--use-config",
        dest="useconfigfile",
        type=str,
        default="",
        metavar="filename",
        help=
        "Specify configuration file to use. This will ignore other command line parameters."
    )
    configuration.add_option(
        "--make-config",
        dest="makeconfigfile",
        type=str,
        default="",
        metavar="filename",
        help=
        "Specify a filename to use as a configuration file. Use command line options to modify defaults. It is possible to use this command without specifying an input file to generate a clean configuration file."
    )

    general.add_option(
        "-m",
        "--maxfragsize",
        dest="maxFragmentSize",
        type=int,
        default=cfg.getMaximumFragmentSize(),
        metavar="integer",
        help="The maximum fragment size allowed [default: %default]")
    general.add_option(
        "-g",
        "--groupcount",
        dest="groupcount",
        type=int,
        default=cfg.getFragmentGroupCount(),
        metavar="integer",
        help=
        "Specify number of consecutive fragments to combine into a single fragment [default: %default]"
    )
    general.add_option(
        "--disable-protection",
        dest="disable_protection",
        action="store_true",
        default=False,
        help="Specify this flag to disable the use protection patterns.")
    general.add_option(
        "--merge-glycine",
        action="store_true",
        dest="merge_glycine",
        default=False,
        help=
        "Merge a glycine to the neighbor fragment when fragmenting proteins.")
    general.add_option(
        "--merge-specific",
        dest="mergespecific",
        type=int,
        default=None,
        metavar="integer",
        help=
        "Merge a specific fragment into all other fragments and remove it as a singular fragment."
    )
    general.add_option("--charge-model",
                       dest="charge_model",
                       default=cfg.getChargeModel(),
                       help="Charge model to use [%default]")
    general.add_option("--combine-fragments",
                       dest="combinefragments",
                       type=str,
                       default="",
                       metavar="list of integers",
                       help="Combines several fragments into one.")
    output.add_option("--output-format",
                      dest="format",
                      type=str,
                      default=cfg.getWriter(),
                      help="Output format [%default]")
    output.add_option(
        "--output-boundaries",
        dest="boundaries",
        type=str,
        default="",
        metavar="list of floats",
        help=
        "Specifies boundaries for multiple layers. Must be used with --central-fragment option"
    )
    output.add_option(
        "--output-central-fragment",
        dest="central_fragment",
        type=int,
        default=cfg.getCentralFragmentID(),
        metavar="integer",
        help=
        "Specifies the fragment to use as the central one. Used in combination with --output-boundaries to make layered inputs"
    )
    output.add_option(
        "--output-active-distance",
        dest="active_atoms_distance",
        type=float,
        default=cfg.getActiveAtomsDistance(),
        metavar="float",
        help=
        "Atoms within this distance from --output-central-fragment will be active. Use with --output-buffer-distance to add buffer region between active and frozen parts. [default: %default]"
    )
    output.add_option(
        "--output-buffer-distance",
        dest="maximum_buffer_distance",
        type=float,
        default=cfg.getBufferDistance(),
        metavar="float",
        help=
        "Maximum distance in angstrom from active fragments from which to include nearby fragments as buffers. This option adds and extends to --output-boundaries. [default: %default]"
    )
    output.add_option(
        "--output-freeze-backbone",
        dest="freeze_backbone",
        action="store_true",
        default=cfg.getFreezeBackbone(),
        help="Option to freeze the backbone of the active region.")
    output.add_option(
        "--output-jmol-script",
        dest="output_jmol_script",
        action="store_true",
        default=cfg.getWriteJmolScript(),
        help="Write a complimentary jmol script for visualization.")
    output.add_option(
        "--output-pymol-script",
        dest="output_pymol_script",
        action="store_true",
        default=cfg.getWritePymolScript(),
        help="Write a complimentary pymol script for visualization.")

    parser.add_option_group(configuration)
    parser.add_option_group(general)
    parser.add_option_group(output)
    (options, args) = parser.parse_args(argv)

    if len(args) == 0 and len(options.makeconfigfile) > 0:
        cfg.writeConfigurationToFile(options.makeconfigfile)
        sys.exit()

    if len(args) != 1:
        parser.print_help()
        sys.exit()

    infile = args[0]

    molecule = fileToMol(infile)
    fragmentation = Fragmentation(molecule)

    # if there is a config file, read it and ignore other command line options
    if len(options.useconfigfile) > 0:
        fragmentation.readConfigurationFromFile(options.useconfigfile)
        (writer, output_extension) = get_writer_and_extension(
            fragmentation.getOutputFormat())
    else:
        fragmentation.setChargeModel(options.charge_model)
        fragmentation.setMaximumFragmentSize(options.maxFragmentSize)
        fragmentation.setOutputFormat(options.format)
        if options.groupcount > 1:
            fragmentation.setFragmentGroupCount(options.groupcount)

        (writer, output_extension) = get_writer_and_extension(options.format)

    outfile = "%s%s" % (file_basename(infile), output_extension)
    if len(options.outputfile) > 0:
        outfile = options.outputfile

    # do the fragmentation procedure
    # 'fragmentation.doFragmentSpecificMerging()' should go somewhere here...
    fragmentation.setCombineFragments(options.combinefragments)
    if options.disable_protection:
        fragmentation.clearProtectPatterns()
    if options.merge_glycine:
        fragmentation.enableMergeGlycinePattern()
    fragmentation.beginFragmentation()
    fragmentation.doFragmentation()
    fragmentation.doFragmentMerging()
    fragmentation.doFragmentCombination()
    if fragmentation.getFragmentGroupCount() > 1:
        fragmentation.doFragmentGrouping()
    fragmentation.finishFragmentation()

    # write to file
    out = writer(fragmentation)

    # set options from command line
    boundaries = options.boundaries
    central_fragment = options.central_fragment
    active_atoms_distance = options.active_atoms_distance
    maximum_buffer_distance = options.maximum_buffer_distance
    freeze_backbone = options.freeze_backbone
    output_pymol_script = options.output_pymol_script
    output_jmol_script = options.output_jmol_script

    # set options from config file
    if len(options.useconfigfile) > 0:
        boundaries = fragmentation.getBoundaries()
        central_fragment = fragmentation.getCentralFragmentID()
        output_pymol_script = fragmentation.getWritePymolScript()
        output_jmol_script = fragmentation.getWriteJmolScript()
        freeze_backbone = fragmentation.getFreezeBackbone()
        maximum_buffer_distance = fragmentation.getBufferDistance()
        active_atoms_distance = fragmentation.getActiveAtomsDistance()

    # set the options
    out.setBoundariesFromString(boundaries)
    out.setCentralFragmentID(central_fragment)
    out.setActiveAtomsDistance(active_atoms_distance)
    out.setBufferMaxDistance(maximum_buffer_distance)
    if freeze_backbone: out.setFreezeBackbone()
    if output_pymol_script: out.setPymolOutput(infile, outfile)
    if output_jmol_script: out.setJmolOutput(infile, outfile)

    out.setup()
    out.writeFile(outfile)

    # write configuration file
    if len(options.makeconfigfile) > 0:
        fragmentation.setBoundaries(boundaries)
        fragmentation.writeConfigurationToFile(options.makeconfigfile)
Example #17
0
import csv, sys, util, random

from optparse import OptionParser

parser = OptionParser()


parser.add_option('-n','--number', default=10000, dest='count',help='sample number')
parser.add_option('-p','--prob', type='float', dest='p',help='sample probability')
#parser.add_option('-f','--file', dest='fname',help='file name to sample')

(options, args) = parser.parse_args()
#print options,args
#sys.exit()

fname = util.file_basename(args[0])
fd = open('%s.sample.csv' % fname,'wb')
writer = csv.writer(fd, delimiter=',')
with open('%s.csv' % fname, 'rb') as f:
	reader = csv.reader(f, delimiter=',')
	i = 0
	
	for row in reader:
	
		if i==0 or options.p is None or random.random()<options.p:
			writer.writerow(row)
		
			
		
		i = i+1
		if options.p is None and i==options.count: