Python DictWriter.DictWriter Exemples, csv.DictWriter.DictWriter Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : testSVMsigmoid.py Projet : rpcrimi/Machine-Learning-Homework

from extractor import NewPbpExtractor
from nflEvaluation import classifierEvaluation
from nflClassifier import *
from sklearn.cross_validation import train_test_split

#--------------------------------------------------#
#training
#data2: combine 2014, first 80% for train, 20% for test
data2013 = list(DictReader(open("pbp-2013.csv", 'r')))
data2014 = list(DictReader(open("pbp-2014.csv", 'r')))
data2015 = list(DictReader(open("pbp-2015.csv", 'r')))
dataList = [data2013, data2014, data2015]
dataName = ["2013", "2014", "2015"]

o = DictWriter(open("SVMoutput-sigmoid.csv", 'w'), [
    "dataName", "classifier", "percent", "score", "OmniScore",
    "Type1-A/A/Good", "Type2-A/B/Bad", "Type3-A/B/Good", "Type4-A/A/Bad"
])
o.writeheader()

#---------------------------------#
for dataindex in range(len(dataList)):
    pbp2014 = NewPbpExtractor()
    pbp2014.buildFormationList(dataList[dataindex])
    feature, target = pbp2014.extract4Classifier(dataList[dataindex])

    dataLength = feature.shape[0]
    dataLength80 = round(dataLength * 0.8)
    X_train = feature[0:dataLength80, :]
    X_test = feature[(dataLength80 + 1):dataLength, :]
    y_train = target[0:dataLength80]
    y_test = target[(dataLength80 + 1):dataLength]

Exemple #2

0

Afficher le fichier

Fichier : collate_FLNC_gene_info.py Projet : njdbickhart/cDNA_Cupcake

def collate_gene_info(group_filename,
                      csv_filename,
                      class_filename,
                      output_filename,
                      ontarget_filename=None,
                      dedup_ORF_prefix=None,
                      no_extra_base=False,
                      is_clustered=False):
    """
    <id>, <pbid>, <length>, <transcript>, <gene>, <category>, <ontarget Y|N|NA>, <ORFgroup NA|NoORF|groupID>, <UMI>, <BC>
    """
    FIELDS = [
        'id', 'pbid', 'length', 'transcript', 'gene', 'category', 'ontarget',
        'ORFgroup', 'UMI', 'UMIrev', 'BC', 'BCrev'
    ]

    group_info = read_group_info(group_filename)
    umi_bc_info = dict(
        (r['id'], r) for r in DictReader(open(csv_filename), delimiter='\t'))
    sqanti_info = dict(
        (r['isoform'], r)
        for r in DictReader(open(class_filename), delimiter='\t'))
    if ontarget_filename is not None:
        ontarget_info = dict(
            (r['read_id'], r)
            for r in DictReader(open(ontarget_filename), delimiter='\t'))

    if dedup_ORF_prefix is not None:
        dedup_ORF_info = {
        }  # seqid --> which group they belong to (ex: PB.1.2 --> ORFgroup_PB.1_1)
        for line in open(dedup_ORF_prefix + '.group.txt'):
            group_id, members = line.strip().split('\t')
            for pbid in members.split(','):
                dedup_ORF_info[pbid] = group_id

    f = open(output_filename, 'w')
    writer = DictWriter(f, FIELDS, delimiter='\t')
    writer.writeheader()

    for ccs_id, pbid in group_info.items():
        if pbid not in sqanti_info:
            print(
                "ignoring ID {0} cuz not in classification file.".format(pbid),
                file=sys.stderr)
            continue

        if is_clustered:
            # id: 1-ATCGAATGT-GCTTCTTTCACCTATCGATGATGGCTCAT-m64015_200531_015713/110297924/ccs
            _index, _umi, _bc, _ccs_id = ccs_id.split('-')
            ccs_id = _ccs_id

        if no_extra_base and (not is_clustered
                              and umi_bc_info[ccs_id]['extra'] != 'NA'):
            print("ignoring ID {0} cuz extra bases.".format(pbid),
                  file=sys.stderr)
            continue
        rec = {'id': ccs_id, 'pbid': pbid}
        rec['length'] = sqanti_info[pbid]['length']
        rec['category'] = sqanti_info[pbid]['structural_category']
        rec['transcript'] = sqanti_info[pbid]['associated_transcript']
        rec['gene'] = sqanti_info[pbid]['associated_gene']

        if is_clustered:
            rec['UMI'] = _umi
            rec['BC'] = _bc
        else:
            rec['UMI'] = umi_bc_info[ccs_id]['UMI']
            rec['BC'] = umi_bc_info[ccs_id]['BC']
        rec['UMIrev'] = Seq(rec['UMI']).reverse_complement()
        rec['BCrev'] = Seq(rec['BC']).reverse_complement()
        if ontarget_filename is None:
            rec['ontarget'] = 'NA'
        else:
            rec['ontarget'] = 'Y' if ontarget_info[pbid]['genes'] != '' else 'N'
        if dedup_ORF_prefix is None:
            rec['ORFgroup'] = 'NA'
        else:
            if pbid not in dedup_ORF_info:
                rec['ORFgroup'] = 'NoORF'
            else:
                rec['ORFgroup'] = dedup_ORF_info[pbid]

        writer.writerow(rec)

    f.close()

Exemple #3

0

Afficher le fichier

Fichier : citationsource.py Projet : opencitations/index

 def update_status_file(self):
     with open(self.status_file, "w", encoding="utf8") as f:
         w = DictWriter(f, fieldnames=("file", "line"))
         w.writeheader()
         w.writerow({"file": self.last_file, "line": self.last_row})

Exemple #4

0

Afficher le fichier

Fichier : userFeature_data_to_csv.py Projet : JinXiejie/TencentLookalike

from csv import DictWriter

with open('D:/TencentAds/TencentLookalike/MetaData/userFeature.csv',
          'w') as fo:
    headers = [
        'uid', 'age', 'gender', 'marriageStatus', 'education',
        'consumptionAbility', 'LBS', 'interest1', 'interest2', 'interest3',
        'interest4', 'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2',
        'topic3', 'appIdInstall', 'appIdAction', 'ct', 'os', 'carrier', 'house'
    ]
    writer = DictWriter(fo, fieldnames=headers, lineterminator='\n')
    writer.writeheader()

    fi = open('D:/TencentAds/TencentLookalike/MetaData/userFeature.data', 'r')
    for t, line in enumerate(fi, start=1):
        line = line.replace('\n', '').split('|')
        userFeature_dict = {}
        for each in line:
            each_list = each.split(' ')
            userFeature_dict[each_list[0]] = ' '.join(each_list[1:])
        writer.writerow(userFeature_dict)
        if t % 100000 == 0:
            print(t)
    fi.close()

Exemple #5

0

Afficher le fichier

Fichier : train_augmented.py Projet : tc30/augmented-image-classification

def save_results(res_file, results_dict, fieldnames):
    print(json.dumps(results_dict))
    with open(res_file, 'a') as f:
        writer = DictWriter(f, fieldnames=fieldnames)
        writer.writerow(results_dict)

Exemple #6

0

Afficher le fichier

    def scanForm(self):
        pytesseract.pytesseract.tesseract_cmd = 'c:\\Program Files\\Tesseract-OCR\\tesseract.exe'
        imgQ = cv2.imread(self.queryPath)
        h, w, c = imgQ.shape

        obj = ROI(self.queryPath)

        roi = obj.RI()
        print(roi)
        with open('dataOutput.csv', 'a+', newline='') as f:
            fields = [i[3] for i in roi]
            csvWriter = DictWriter(f, fieldnames=fields)
            csvWriter.writeheader()
        ## ORB-> Oriented FAST and  Rotated BRIEF
        orb = cv2.ORB_create(1000)
        ##kp->key points unique points or elements in img
        ##des1->descripters are the representation of keys points
        kp1, des1 = orb.detectAndCompute(imgQ, None)
        # imgKp1=cv2.drawKeypoints(imgQ,kp1,None)
        # print(des1)

        path = self.dataPath
        myPicList = os.listdir(path)
        print(myPicList)

        for j, y in enumerate(myPicList):

            img = cv2.imread(path + "/" + y)

            kp2, des2 = orb.detectAndCompute(img, None)
            bf = cv2.BFMatcher(cv2.NORM_HAMMING)
            matches = bf.match(des2, des1)
            matches.sort(key=lambda x: x.distance)
            good = matches[:int(len(matches) * (self.percentage / 100))]

            imgMatch = cv2.drawMatches(img,
                                       kp2,
                                       imgQ,
                                       kp1,
                                       good[:100],
                                       None,
                                       flags=2)

            # #########################################################################
            # item.distance: This attribute gives us the distance between the descriptors. A lower distance indicates a better match.
            # item.trainIdx: This attribute gives us the index of the descriptor in the list of train descriptors (in our case, it’s the list of descriptors in the img2).
            # item.queryIdx: This attribute gives us the index of the descriptor in the list of query descriptors (in our case, it’s the list of descriptors in the img1).
            # item.imgIdx: This attribute gives us the index of the train image.
            # #################################################################################

            # print(good[0],kp2[good[0].queryIdx].pt,kp1[good[0].trainIdx].pt)
            srcPoints = np.float32([kp2[m.queryIdx].pt
                                    for m in good]).reshape(-1, 1, 2)
            dstPoints = np.float32([kp1[m.trainIdx].pt
                                    for m in good]).reshape(-1, 1, 2)

            # print(srcPoints)
            M, _ = cv2.findHomography(srcPoints, dstPoints, cv2.RANSAC, 5.0)
            # print(M)

            imgScan = cv2.warpPerspective(img, M, (w, h))
            # imgScan = cv2.resize(imgScan, (w //4, h //4))

            # cv2.imshow(y, imgScan)

            imgShow = imgScan.copy()
            imgMask = np.zeros_like(imgShow)

            myData = []
            print(f'extracting data from form {j}')

            for x, r in enumerate(roi):

                imgCrop = imgScan[r[0][1]:r[1][1], r[0][0]:r[1][0]]

                if str(r[2]) == str('text'):
                    s = pytesseract.image_to_string(imgCrop).replace("\n", "")
                    s = s.replace("\x0c", "")
                    print(f'{r[3]}:{s}')
                    myData.append(s)

                if r[2] == 'box':
                    imgGray = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2GRAY)
                    ###inverse ->bright region gives zero and dark region gives one
                    imgThresh = cv2.threshold(imgGray, 170, 255,
                                              cv2.THRESH_BINARY_INV)[1]
                    totalPixels = cv2.countNonZero(imgThresh)
                    # print(r[3],totalPixels)
                    if totalPixels > self.pixelThreshold: totalPixels = 1
                    else: totalPixels = 0
                    print(f'{r[3]}:{totalPixels}')
                    myData.append(totalPixels)

                cv2.putText(imgShow, str(myData[x]), (r[0][0], r[0][1]),
                            cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 0, 255), 4)

            with open('dataOutput.csv', 'a+', newline='') as f:
                fields = [i[3] for i in roi]
                csvWriter = DictWriter(f, fieldnames=fields)
                print('my data is ', myData)
                dic = {}
                for i, d in enumerate(myData):
                    dic[fields[i]] = d
                csvWriter.writerow(dic)

            imgShow = cv2.resize(imgShow, (w // 4, h // 4))
            cv2.imshow('final', imgShow)
            print(myData)

        cv2.waitKey(0)

Exemple #7

0

Afficher le fichier

    X_test = test['X']
    y_test = test['Y']
    print(y_train)
    print(list(set(y_train)))
    print(index)

    print("Model with dim=" + str(dim) + ", vecinos=" + str(vecinos) +
          " y C=" + str(c))
    clf = LogisticRegression(C=c, max_iter=1000000)
    clf.fit(preprocessing.scale(X_train), y_train)
    y_pred = clf.predict(preprocessing.scale(X_test))
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print(accuracy_score(y_test, y_pred))

    filename = 'lr_%s_%s.pckl' % (str(dim), str(c))
    elapsed_time = perf_counter() - t0
    print("Time " + str(elapsed_time))

    pickle.dump(clf, open(filename, 'wb'), protocol=4)

    result = {
        "Dimension": dim,
        "Vecinos": vecinos,
        "C": c,
        "Accuracy": accuracy_score(y_test, y_pred)
    }
    with open("lriso.csv", "a+", newline='') as file:
        dict_writer = DictWriter(file, fieldnames=field_names)
        dict_writer.writerow(result)

Exemple #8

0

Afficher le fichier

    def export_csv_file(self, product_templates):
        """
        This method is used for export the odoo products in csv file format
        :param self: It contain the current class Instance
        @author: Nilesh Parmar @Emipro Technologies Pvt. Ltd on date 04/11/2019
        """
        buffer = StringIO()

        delimiter = ","
        field_names = [
            "template_name", "product_name", "product_default_code",
            "shopify_product_default_code", "product_description",
            "PRODUCT_TEMPLATE_ID", "PRODUCT_ID", "CATEGORY_ID"
        ]
        csvwriter = DictWriter(buffer, field_names, delimiter=delimiter)
        csvwriter.writer.writerow(field_names)

        rows = []
        for template in product_templates:
            if len(template.attribute_line_ids) > 3:
                continue
            if len(template.product_variant_ids.ids
                   ) == 1 and not template.default_code:
                continue
            for product in template.product_variant_ids.filtered(
                    lambda variant: variant.default_code):
                row = {
                    "PRODUCT_TEMPLATE_ID": template.id,
                    "template_name": template.name,
                    "CATEGORY_ID": template.categ_id.id,
                    "product_default_code": product.default_code,
                    "shopify_product_default_code": product.default_code,
                    "PRODUCT_ID": product.id,
                    "product_name": product.name,
                    "product_description": product.description or None,
                }
                rows.append(row)

        if not rows:
            raise Warning(
                "No data found to be exported.\n\nPossible Reasons:\n   - Number of "
                "attributes are more than 3.\n   - SKU(s) are not set properly."
            )
        csvwriter.writerows(rows)
        buffer.seek(0)
        file_data = buffer.read().encode()
        self.write({
            "datas": base64.encodestring(file_data),
            "file_name": "Shopify_export_product"
        })

        return {
            "type":
            "ir.actions.act_url",
            "url":
            "web/content/?model=shopify.prepare.product.for.export.ept&id=%s&field=datas&field=datas&download=true&filename=%s.csv"
            % (self.id, self.file_name +
               str(datetime.now().strftime("%d/%m/%Y:%H:%M:%S"))),
            "target":
            self
        }

Exemple #9

0

Afficher le fichier

        if 'map q' in line:
            print (line)
            words = line.split()
            reg_index_pair = regex.match(words[2])
            register = reg_index_pair.group(1)
            index = int(reg_index_pair.group(2))

            reg_map.append((register,index))
            outcome[register] = 0

# print ("reg_map = ")
# print (reg_map)

with open(sys.argv[2], 'a+') as csvfile: # open for appending

    writer = DictWriter(csvfile, fieldnames=list(outcome.keys()))
    if not csvfile.read():
        writer.writeheader()

    # get the printouts from all the trials
    filenames = glob.glob(os.path.splitext(sys.argv[1])[0]+'.trial_*.out')
    print ("filenames = ")
    print (filenames)
    for filename in filenames:
        with open(filename) as file:

            # Parse QX Simulator output format
            state_lines = False # whether these lines are state amplitude lines
            first_basis = True # whether this is the first state such that we need to find the global phase
            phase_global = 0.0
            for line in file:

Exemple #10

0

Afficher le fichier

Fichier : update_ranking.py Projet : yousuffarhan/scrapy.org

from csv import DictReader, DictWriter
from collections import defaultdict
from operator import itemgetter

MIN_SCORE = 5

with open('points.csv') as f:
    reader = DictReader(f)
    catpoints = {r['category']: int(r['points']) for r in reader}

points = defaultdict(int)
with open('contributions.csv') as f:
    reader = DictReader(f)
    for r in reader:
        points[r['company']] += catpoints[r['category']]

ranking = sorted(points.items(), key=itemgetter(1), reverse=True)

with open('ranking.csv', 'w') as f:
    writer = DictWriter(f, ['company', 'score'])
    writer.writeheader()
    writer.writerows({
        'company': c,
        'score': s
    } for (c, s) in ranking if s >= MIN_SCORE)

Exemple #11

0

Afficher le fichier

Fichier : summarize_sample_GFF_junctions.py Projet : dpryan79/cDNA_Cupcake

def summarize_junctions(sample_dirs, sample_names, gff_filename, output_prefix, genome_d=None, junction_known=None):
    """
    1. for each sample, read all the GFF, store the junction information (both 0-based)

    """
    junc_by_chr_strand = defaultdict(lambda: defaultdict(lambda: [])) # (chr,strand) --> (donor,acceptor) --> samples it show up in (more than once possible)

    for sample_name, d in sample_dirs.items():
        for r in GFF.collapseGFFReader(os.path.join(d, gff_filename)):
            n = len(r.ref_exons)
            if n == 1: continue # ignore single exon transcripts
            for i in range(n-1):
                donor = r.ref_exons[i].end-1 # make it 0-based
                accep = r.ref_exons[i+1].start # start is already 0-based
                junc_by_chr_strand[r.chr, r.strand][donor, accep].append(sample_name)

    # write junction report
    f1 = open(output_prefix+'.junction.bed', 'w')
    f1.write("track name=junctions description=\"{0}\" useScore=1\n".format(output_prefix))

    JUNC_DETAIL_FIELDS = ['chr', 'left', 'right', 'strand', 'num_transcript', 'num_sample', 'genome', 'annotation', 'label']


    with open(output_prefix+'.junction_detail.txt', 'w') as f:
        writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter='\t')
        writer.writeheader()
        keys = list(junc_by_chr_strand.keys())
        keys.sort()
        for _chr, _strand in keys:
            v = junc_by_chr_strand[_chr, _strand]
            v_keys = list(v.keys())
            v_keys.sort()
            labels = cluster_junctions(v_keys)
            for i,(_donor, _accep) in enumerate(v_keys):
                rec = {'chr': _chr,
                       'left': _donor,
                       'right': _accep,
                       'strand': _strand,
                       'num_transcript': len(v[_donor,_accep]),
                       'num_sample': len(set(v[_donor,_accep]))}
                #f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep]))))
                f1.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(_chr, _donor, _accep+1, output_prefix, len(v[_donor,_accep]), _strand))
                # if genome is given, write acceptor-donor site
                if genome_d is None or _chr not in genome_d:
                    rec['genome'] = 'NA'
                    #f.write("NA\t")
                else:
                    up, down = genome_d[_chr][_donor+1:_donor+3], genome_d[_chr][_accep-2:_accep]
                    if _strand == '+':
                        rec['genome'] = "{0}-{1}".format(str(up.seq).upper(), str(down.seq).upper())
                        #f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper()))
                    else:
                        rec['genome'] = "{0}-{1}".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper())
                        #f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper()))
                # if annotation is given, check if matches with annotation
                if junction_known is None:
                    rec['annotation'] = 'NA'
                    #f.write("NA\n")
                else:
                    if (_chr, _strand) in junction_known and (_donor, _accep) in junction_known[_chr, _strand]:
                        rec['annotation'] = 'Y'
                        #f.write("Y\t")
                    else:
                        rec['annotation'] = 'N'
                        #f.write("N\t")
                rec['label'] = "{c}_{s}_{lab}".format(c=_chr, s=_strand, lab=labels[i])
                writer.writerow(rec)
                #f.write("{c}_{s}_{lab}\n".format(c=_chr, s=_strand, lab=labels[i]))
    f1.close()

    return junc_by_chr_strand

Exemple #12

0

Afficher le fichier

Fichier : escrevendo_csv.py Projet : Pedro-H-Castoldi/descobrindo_Python

            escritor_csv.writerow([filme, genero, duracao]) # Irá colocar cada dado em seu respectivo local de cabeçalho.
        else:
            print('Encerrado.')

# OBS: Toda vez q for executar a escrita, o cabeçalho é escrito junto, repetindo ele no texto (isso se usar o modo 'a').

"""

# DictWriter

from csv import DictWriter

with open('filmes2.csv', 'a+', encoding='UTF-8') as arq:
    cabecalho = ['Título', 'Gênero', 'Duração']

    escritor_csv = DictWriter(arq, fieldnames=cabecalho)  # Criando cabeçalho.
    escritor_csv.writeheader()  # Inserindo cabeçalho.
    filme = None

    while filme != 'sair':
        filme = str(input('Título: '))
        if filme != 'sair':
            genero = str(input('Gênero: '))
            duracao = int(input('Duração em minutos: '))

            escritor_csv.writerow({
                'Título': filme,
                'Gênero': genero,
                'Duração': duracao
            })  # A chave tem q ser escrita q nem como foi anunciada lá encima.

Exemple #13

0

Afficher le fichier

Fichier : removeEmptyData.py Projet : rpcrimi/Machine-Learning-Homework

import numpy as np
import argparse
import pprint

data = list(DictReader(open("pbp-2015.csv", 'r')))

with open("pbp-2015New.csv", "w") as csvFile:
    fieldnames = [
        "GameId", "GameDate", "Quarter", "Minute", "Second", "OffenseTeam",
        "DefenseTeam", "Down", "ToGo", "YardLine", "SeriesFirstDown",
        "Description", "SeasonYear", "Yards", "Formation", "PlayType",
        "IsRush", "IsPass", "IsIncomplete", "IsTouchdown", "PassType",
        "IsSack", "IsChallenge", "IsChallengeReversed", "IsInterception",
        "IsFumble", "IsPenalty", "IsTwoPointConversion",
        "IsTwoPointConversionSuccessful", "RushDirection", "YardLineFixed",
        "YardLineDirection", "IsPenaltyAccepted", "PenaltyTeam", "IsNoPlay",
        "PenaltyType", "PenaltyYards", "HomeTeam", "VisitingTeam",
        "HomeTeamFinalScore", "VisitingTeamFinalScore"
    ]
    writer = DictWriter(csvFile, fieldnames=fieldnames, extrasaction='ignore')
    writer.writeheader()

    for play in data:
        del play["NextScore"]
        del play["IsMeasurement"]
        del play["Challenger"]
        del play["TeamWin"]

        writer.writerow(play)

Exemple #14

0

Afficher le fichier

Fichier : JeffersonCoCleanCSV.py Projet : vincentdavis/Colorado-Property-Data


now = dt.datetime.utcnow()                                                        
logging.basicConfig(filename='ATSDTA_ATSP600_clean' + str(now) + '.log', level=logging.WARNING)

pnum = re.compile(r'^P\d{3}$') 
tickmarks = re.compile(r"^`+$")

CSVFILE2013 = '../data/JeffersonCo/JeffcoData Nov 2013/ATSDTA_ATSP600.txt'
CSVFILECLEAN2013 = '../data/JeffersonCo/JeffcoData Nov 2013/CLEANED_ATSDTA_ATSP600.csv'
#CSVFILECLEAN2013 =  '/Users/vmd/GitHub/Jeffco-Properties/Data/JeffersonCo/Datasets/test1.csv'

with open(CSVFILE2013, 'r', encoding='utf-8', errors='ignore', newline='') as csvread:
    reader = DictReader(csvread)
    with open(CSVFILECLEAN2013, 'w') as csvwrite:
        writer = DictWriter(csvwrite, delimiter=',', fieldnames=reader.fieldnames)
        writer.writeheader()
        for line in reader:
            for k,i in line.items():
                line[k] = i.strip(" ")
            if tickmarks.match(line['PRPZIP4']):
                line['PRPZIP4'] = ''
                logging.debug(' Cleaned "^`+$" match from line ' + line['SCH'] + ' PRPZIP4 ' + str(line['PRPZIP4']),) 
            if pnum.match(line['TTD']):
                line['TTD'] = line['TTD'].strip('P')
                logging.debug(' Cleaned "^P\d{3}$" match from line ' + line['SCH'] + ' TTD ' + str(line['TTD']),)
            if pnum.match(line['ATD']):
                line['ATD'] = line['ATD'].strip('P')
                logging.debug(' Cleaned "^P\d{3}$" match from line ' + line['SCH'] + ' ATD ' + str(line['ATD']),)
            if line['ALTATD'] == 'N99':
                line['ALTATD'] = ''

Exemple #15

0

Afficher le fichier

Fichier : cluster_by_UMI_mapping.py Projet : wenmore/cDNA_Cupcake

def collect_cluster_results(group_csv,
                            out_dir,
                            output_prefix,
                            use_BC=False,
                            indices_to_use=None):
    """
    <index>,<UMI>,<locus>,<transcript or NA>,<ccs_id>
    # HQ/LQ transcript IDs go from
    transcript/0 --> 1-TCAAGGTC-transcript/0
    # however singletons (shouldn't have much post cluster) will keep the CCS ID which is ok
    """
    f_out_rep = open(output_prefix + '.clustered_transcript.rep.fasta', 'w')
    f_out_not = open(output_prefix + '.clustered_transcript.not_rep.fasta',
                     'w')
    f_csv = open(output_prefix + '.clustered_transcript.csv', 'w')
    writer = DictWriter(
        f_csv,
        fieldnames=['index', 'UMI', 'BC', 'locus', 'cluster', 'ccs_id'],
        delimiter=',')
    #writer.writeheader()

    bad = []
    for r in DictReader(open(group_csv), delimiter=','):
        index = r['index']
        if indices_to_use is not None and index not in indices_to_use:
            continue
        umi_key = r['UMI']
        if use_BC:
            umi_key += '-' + r['BC']
        members = list(set(r['members'].split(',')))

        if len(members) == 1:  # singleton, no directory, continue
            continue
        d = os.path.join(out_dir, index, umi_key)
        hq = os.path.join(d, 'output.hq.fasta.gz')
        lq = os.path.join(d, 'output.lq.fasta.gz')
        single = os.path.join(d, 'output.singletons.fasta.gz')
        report = os.path.join(d, 'output.cluster_report.csv')

        if not os.path.exists(d):
            print("{0} does not exist! DEBUG mode, ok for now".format(d))
            bad.append(d)
            continue

        if not os.path.exists(report):
            print("{0} does not exist! DEBUG mode, ok for now".format(report))
            bad.append(report)
            continue

        hq_count = 0
        if os.path.exists(hq):
            with gzip.open(hq, 'rt') as handle:
                for seqrec in SeqIO.parse(handle, 'fasta'):
                    if hq_count == 0:
                        f_out_rep.write(">{i}-{u}-{n}\n{s}\n".format(
                            i=index, u=umi_key, n=seqrec.id, s=seqrec.seq))
                    else:
                        f_out_not.write(">{i}-{u}-{n}\n{s}\n".format(
                            i=index, u=umi_key, n=seqrec.id, s=seqrec.seq))
                    hq_count += 1

        if os.path.exists(lq):
            with gzip.open(lq, 'rt') as handle:
                for seqrec in SeqIO.parse(handle, 'fasta'):
                    f_out_not.write(">{i}-{u}-{n}\n{s}\n".format(i=index,
                                                                 u=umi_key,
                                                                 n=seqrec.id,
                                                                 s=seqrec.seq))

        info = {
            'index': index,
            'UMI': r['UMI'],
            'BC': r['BC'] if use_BC else 'NA',
            'locus': r['locus']
        }
        for ccs_rec in DictReader(open(report), delimiter=','):
            # cluster_id,read_id,read_type
            # transcript/0,m64012_191109_035807/102828567/ccs,FL
            # transcript/0,m64012_191109_035807/121700628/ccs,FL
            info['cluster'] = ccs_rec['cluster_id']
            info['ccs_id'] = ccs_rec['read_id']
            writer.writerow(info)

        # NOTE: singletons are not part of output.cluster_report.csv
        info['cluster'] = 'NA'
        if os.path.exists(single):
            with gzip.open(single, 'rt') as handle:
                for seqrec in SeqIO.parse(handle, 'fasta'):
                    f_out_not.write(">{i}-{u}-{n}\n{s}\n".format(i=index,
                                                                 u=umi_key,
                                                                 n=seqrec.id,
                                                                 s=seqrec.seq))
                    info['ccs_id'] = seqrec.id
                    writer.writerow(info)

    f_out_not.close()
    f_out_rep.close()
    f_csv.close()
    return bad, f_out_rep.name, f_csv.name

Exemple #16

0

Afficher le fichier

Fichier : html_handeling.py Projet : NKrChauhan/result-scraper-aspx

def append_row(filename, dict_of_elem):
    field_names = dict_of_elem.keys()
    with open(filename, 'a+', newline='') as write_obj:
        dict_writer = DictWriter(write_obj, fieldnames=field_names)
        dict_writer.writerow(dict_of_elem)

Exemple #17

0

Afficher le fichier

def create_csv(csv_path, headers):
    """Creates the CSV file with the headers (must be a list)"""
    with open(csv_path, 'w') as csv_file:
        writer = DictWriter(csv_file, fieldnames=headers)
        writer.writeheader()

Exemple #18

0

Afficher le fichier

Fichier : main.py Projet : hosmanadam/quote-scraping-game

def _save_to_csv(quotes):
    with open('quotes.csv', 'w') as file:
        DW_object = DictWriter(file, fieldnames=quotes[0].keys())
        DW_object.writeheader()
        DW_object.writerows(quotes)

Exemple #19

0

Afficher le fichier

Fichier : test315csvwriter.py Projet : blockchain99/pythonlecture

#     catsfile = file.read()
#     print(catsfile)
print(" -- reader() from ** writed cats.csv-- ")
from csv import reader
with open("cats.csv") as file:
    csv_reader = reader(file)

    # print(list(csv_reader)) #option 1

    for row in csv_reader:  #option 2
        print(row)

print("-----------DicWriter--------------------")
# Version using DictWriter
with open("cats2.csv", "w") as file:
    headers = ["Name", "Breed", "Age"]
    csv_writer = DictWriter(file, fieldnames=headers)
    csv_writer.writeheader()
    csv_writer.writerow({
        "Name": "Garfield",
        "Breed": "Orange Tabby",
        "Age": 10
    })

with open("cats2.csv", "r") as file:
    lines = file.readlines()
    print(lines)
print("-------------------")
with open("cats2.csv", "r") as file:
    result = file.read()
    print(result)

Exemple #20

0

Afficher le fichier

def gaVRPMS(instName,
            unitCost,
            initCost,
            waitCost,
            delayCost,
            lightUnitCost,
            lightInitCost,
            lightWaitCost,
            lightDelayCost,
            indSize,
            popSize,
            cxPb,
            mutPb,
            NGen,
            exportCSV=False,
            customizeData=False):
    if customizeData:
        jsonDataDir = os.path.join(BASE_DIR, 'data', 'json_customize')
    else:
        jsonDataDir = os.path.join(BASE_DIR, 'data', 'json')
    jsonFile = os.path.join(jsonDataDir, '%s.json' % instName)
    with open(jsonFile) as f:
        instance = load(f)
    creator.create('FitnessMax', base.Fitness, weights=(1.0, ))
    creator.create('Individual', list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()
    # Attribute generator
    toolbox.register('indexes', random.sample, range(1, indSize + 1), indSize)
    # Structure initializers
    toolbox.register('individual', tools.initIterate, creator.Individual,
                     toolbox.indexes)
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)
    # Operator registering
    toolbox.register('evaluate',
                     evalVRPMS,
                     instance=instance,
                     unitCost=unitCost,
                     initCost=initCost,
                     waitCost=waitCost,
                     delayCost=delayCost,
                     lightUnitCost=lightUnitCost,
                     lightInitCost=lightInitCost,
                     lightWaitCost=lightWaitCost,
                     lightDelayCost=lightDelayCost)
    toolbox.register('select', tools.selRoulette)
    toolbox.register('mate', cxPartialyMatched)
    toolbox.register('mutate', mutInverseIndexes)
    pop = toolbox.population(n=popSize)
    # Results holders for exporting results to CSV file
    csvData = []
    print('Start of evolution')
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
    print('  Evaluated %d individuals' % len(pop))
    # Begin the evolution
    for g in range(NGen):
        print('-- Generation %d --' % g)
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cxPb:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if random.random() < mutPb:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        # Evaluate the individuals with an invalid fitness
        invalidInd = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalidInd)
        for ind, fit in zip(invalidInd, fitnesses):
            ind.fitness.values = fit
        print('  Evaluated %d individuals' % len(invalidInd))
        # The population is entirely replaced by the offspring
        pop[:] = offspring
        # Record statistics into the logbook
        stats = tools.Statistics(key=lambda ind: ind.fitness.values)
        stats.register('avg', numpy.mean)
        stats.register('std', numpy.std)
        stats.register('min', numpy.min)
        stats.register('max', numpy.max)
        record = stats.compile(pop)
        print(record)
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x * x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        print('  Min %s' % min(fits))
        print('  Max %s' % max(fits))
        print('  Avg %s' % mean)
        print('  Std %s' % std)
        # Write data to holders for exporting results to CSV file
        if exportCSV:
            csvRow = {
                'generation': g,
                'evaluated_individuals': len(invalidInd),
                'min_fitness': min(fits),
                'max_fitness': max(fits),
                'avg_fitness': mean,
                'std_fitness': std,
                'avg_cost': 1 / mean,
            }
            csvData.append(csvRow)
    print('-- End of (successful) evolution --')
    bestInd = tools.selBest(pop, 1)[0]
    print('Best individual: %s' % bestInd)
    print('Fitness: %s' % bestInd.fitness.values[0])
    printRoute(ind2route(bestInd, instance))
    print('Total cost: %s' % (1 / bestInd.fitness.values[0]))
    if exportCSV:
        csvFilename = '%s_uC%s_iC%s_wC%s_dC%s_iS%s_pS%s_cP%s_mP%s_nG%s.csv' % (
            instName, unitCost, initCost, waitCost, delayCost, indSize,
            popSize, cxPb, mutPb, NGen)
        csvPathname = os.path.join(BASE_DIR, 'results', csvFilename)
        print('Write to file: %s' % csvPathname)
        makeDirsForFile(pathname=csvPathname)
        if not exist(pathname=csvPathname, overwrite=True):
            with open(csvPathname, 'w') as f:
                fieldnames = [
                    'generation', 'evaluated_individuals', 'min_fitness',
                    'max_fitness', 'avg_fitness', 'std_fitness', 'avg_cost'
                ]
                writer = DictWriter(f, fieldnames=fieldnames, dialect='excel')
                writer.writeheader()
                for csvRow in csvData:
                    writer.writerow(csvRow)

Exemple #21

0

Afficher le fichier

f = ROOT.TFile(fname)
tree = f.DecayTree

attrs = {
    'mass': lambda tree: tree.D0_MM,
    'decaytime': lambda tree: tree.D0_TAU * 1000.,
    'pt': lambda tree: tree.D0_PT,
    'ipchi2': lambda tree: tree.D0_MINIPCHI2
}

ranges = {
    'mass': (1815, 1915),
    'decaytime': (0.15, 6.),
}

with open('D0KpiData.csv', 'w') as fout:
    writer = DictWriter(fout, attrs.keys())
    writer.writeheader()
    for i in range(tree.GetEntries()):
        tree.GetEntry(i)
        vals = {attr: func(tree) for attr, func in attrs.items()}
        inrange = True
        for attr, (vmin, vmax) in ranges.items():
            if vals[attr] < vmin or vals[attr] > vmax:
                inrange = False
                break
        if not inrange:
            continue
        writer.writerow(vals)

Exemple #22

0

Afficher le fichier

    def openVideoWriter(self,
                        filename,
                        encoder=None,
                        overwrite=False,
                        quality=75,
                        bitrate=1000000,
                        img_size=None,
                        csv_timestamps=True,
                        embed_image_info=['timestamp']):
        """
        Opens a video writer. Subsequent calls to .get_image() will
        additionally write those frames out to the file.

        Parameters
        ----------
        filename : str
            Path to desired output file. If extension is omitted it will be
            inferred from <file_format> (if specified).
        encoder : str { 'AVI' | 'MJPG' | 'H264' } or None, optional
            Output encoder to use. If None, will automatically set to 'AVI'
            if filename ends with an '.avi' extension, 'H264' if filename
            ends with a 'mp4' extension, or will raise an error for other
            extensions. Note that 'MJPG' and 'H264' formats permit addtional
            arguments to be passed. The default is None.
        overwrite : bool, optional
            If False and the output file already exists, an error will be
            raised. The default is False.
        quality : int, optional
            Value between 0-100 determining output quality. Only applicable
            for MJPG format. The default is 75.
        bitrate : int, optional
            Bitrate to encode at. Only applicable for H264 format. The default
            is 1000000.
        img_size : (W,H) tuple of ints, optional
            Image resolution. Only applicable for H264 format. If not given,
            will attempt to determine from camera's video mode, but this
            might not work. The default is None.
        csv_timestamps : bool, optional
            If True, timestamps for each frame will be saved to a csv file
            corresponding to the output video file. The default is True.
            Note that to get 1394 cycle timestamps (more accurate), the
            timestamp property MUST be enabled within the embedded image info.
        embed_image_info : list or 'all' or None, optional
            List of properties to embed within top-left image pixels. Note that
            video MUST be monochrome for pixel values to be usable. Available
            properties are: [timestamp, gain, shutter, brightness, exposure,
            whiteBalance, frameCounter, strobePattern, ROIPosition].
            Alternatively specify 'all' to use all available properties.
            Specify None to not embed any properties. The timestamp property
            MUST be enabled to get 1394 cycle timestamps in the CSV file
            (if applicable), regardless of whether the embedded information
            itself is going to be used. The default is to embed timestamps.
        """

        # Try to auto-determine file format if unspecified
        if encoder is None:
            ext = os.path.splitext(filename)[1].lower()  # case insensitive
            if ext == '.avi':
                encoder = 'AVI'
            elif ext == '.mp4':
                encoder = 'H264'
            elif not ext:
                raise ValueError('Cannot determine file_format automatically '
                                 'without file extension')
            else:
                raise ValueError('Cannot determine file_format automatically '
                                 f'from {ext} extension')
            print(f'Recording using {encoder} encoder')

        encoder = encoder.upper()  # ensure case insensitive

        if not encoder in ['AVI', 'MJPG', 'H264']:
            raise ValueError("Encoder must be one of 'AVI', 'MJPG', or 'H264, "
                             f"but received {encoder}")

        # Auto-determine file extension if necessary
        if not os.path.splitext(filename)[1]:
            if encoder in ['AVI', 'MJPG']:
                filename += '.avi'
            elif encoder == 'H264':
                filename += '.mp4'

        # Without overwrite, error if file exists. AVI writer sometimes
        # appends a bunch of zeros to name, so check that too.
        if not overwrite:
            _filename, ext = os.path.splitext(filename)
            alt_filename = _filename + '-0000' + ext
            if os.path.isfile(filename) or os.path.isfile(alt_filename):
                raise OSError(f'Output file {filename} already exists')

        # Update camera to embed image info
        available_info = self.cam.getEmbeddedImageInfo().available
        prop_keys = [k for k in dir(available_info) if not k.startswith('__')]
        props = dict((k, False) for k in prop_keys)

        if embed_image_info:
            if not isinstance(embed_image_info, (list, tuple)):
                embed_image_info = [embed_image_info]

            if 'all' in embed_image_info:
                for k in prop_keys:
                    props[k] = getattr(available_info, k)
            else:  # use specified values
                for k in embed_image_info:
                    if k not in prop_keys:
                        raise KeyError(
                            "Embedded property must be one of "
                            f"list({prop_keys}), but received '{k}'")
                    elif not getattr(available_info, k):
                        raise ValueError(
                            f"'{k}' embedded property not available")
                    props[k] = True

        self.cam.setEmbeddedImageInfo(**props)

        # Open csv writer for timestamps?
        if csv_timestamps:
            csv_filename = os.path.splitext(filename)[0] + '.csv'
            if not overwrite and os.path.isfile(csv_filename):
                raise OSError(f'Timestamps file {csv_filename} already exists')
            self.csv_fd = open(csv_filename, 'w')

            fieldnames = [
                'seconds', 'microSeconds', 'cycleSeconds', 'cycleCount',
                'cycleOffset'
            ]
            self.csv_writer = DictWriter(self.csv_fd,
                                         fieldnames,
                                         delimiter=',',
                                         lineterminator='\n')
            self.csv_writer.writeheader()

        # Initialise video writer, allocate to class
        self.video_writer = PyCapture2.FlyCapture2Video()

        # Open video file
        bytes_filename = filename.encode('utf-8')  # needs to be bytes string
        if encoder == 'AVI':
            self.video_writer.AVIOpen(bytes_filename, self.fps)
        elif encoder == 'MJPG':
            self.video_writer.MJPGOpen(bytes_filename, self.fps, quality)
        elif encoder == 'H264':
            if img_size is None:
                if self.img_size is None:
                    raise RuntimeError('Cannot determine image resolution')
                else:
                    img_size = self.img_size
            W, H = img_size
            self.video_writer.H264Open(bytes_filename, self.fps, W, H, bitrate)

        # Success!
        self._video_writer_isOpen = True

Exemple #23

0

Afficher le fichier

Fichier : train_augmented.py Projet : tc30/augmented-image-classification

def prep_results_file(res_file, fieldnames):
    with open(res_file, 'w') as f:
        writer = DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

Exemple #24

0

Afficher le fichier

    # Train classifier and do cross validation
    lr = SGDClassifier(loss='log', penalty='l2', shuffle=True)
    if args.cvt:
        print("Start cross validation...")
        cv = ShuffleSplit(len(y_train),
                          args.cvt,
                          test_size=args.cvp,
                          random_state=int(time.time()))
        scores = cross_val_score(lr,
                                 x_train,
                                 y_train,
                                 cv=cv,
                                 scoring='accuracy',
                                 verbose=1)
        print(scores)
        print(scores.mean(), scores.std())

    # Run test data
    print("Run test set...")
    x_test = feat.test_feature(test, meta_info)
    lr.fit(x_train, y_train)
    feat.show_top(lr, labels, args.top)
    predictions = lr.predict(x_test)
    o = DictWriter(open(args.output, 'w'), ['id', 'spoiler'])
    o.writeheader()
    for ii, pp in zip([x['id'] for x in test], predictions):
        d = {'id': ii, 'spoiler': labels[pp]}
        o.writerow(d)
    print 'Finish!'

Exemple #25

0

Afficher le fichier

def export_bids(raweeg=None,
                bids_paths=None,
                participants=None,
                overwrite=False,
                verbose=False):
    '''Export recordings in BIDS format.

    Args:
        raweeg : array
            EEG streams previously imported as MNE RawArray instances.
        bids_paths : array
            BIDS paths as MNE-BIDS BIDSPath instances.
        participants : array
            The participants in recordings. Template available in README.
        overwrite : bool
            Overwrite existing BIDS recordings.
        verbose : bool
            Show process while exporting.
    Raises:
        ValueError: if no stream is specified in raweeg, or raweeg and bids_paths do not have the same lenght.
    See also:
        create_bids_path
    '''
    if raweeg is None or bids_paths is None:
        raise ValueError(
            'You must enter EEG recording and BIDS path array parameters.')

    raweeg = [raweeg] if not isinstance(raweeg, list) else raweeg
    bids_paths = [bids_paths
                  ] if not isinstance(bids_paths, list) else bids_paths
    participants = [participants
                    ] if not isinstance(participants, list) else participants

    if len(raweeg) != len(bids_paths):
        raise ValueError('BIDS path and eeg arrays must have the same length.')

    # Create BIDS
    for index, recording in enumerate(raweeg):
        if system() == 'Windows':
            temporal_file_path = gettempdir() + '\\dummy_raw.fif'
        else:
            temporal_file_path = gettempdir() + '/dummy_raw.fif'

        if path.exists(temporal_file_path):
            remove(temporal_file_path)

        # Temporal file storing because of package requirements
        recording.save(temporal_file_path)
        file_rec = io.Raw(fname=temporal_file_path)

        # Create BIDS structure with EEG data
        write_raw_bids(raw=file_rec,
                       bids_path=bids_paths[index],
                       events_data=None,
                       event_id=None,
                       anonymize=None,
                       overwrite=overwrite,
                       verbose=verbose)

        remove(temporal_file_path)
        print('Exported recording: ', recording.annotations.description)

    # Fill participants info file
    if participants != 'None':
        for subject in participants:
            root_folder = r'%s' % subject['root']
            if system() == 'Windows':
                if root_folder[-1] == '\\':
                    root_folder = root_folder + 'participants.tsv'
                else:
                    root_folder = root_folder + '\\participants.tsv'
            else:
                if root_folder[-1] == '/':
                    root_folder = root_folder + 'participants.tsv'
                else:
                    root_folder = root_folder + '/participants.tsv'

        with open(root_folder, mode='r+', encoding='utf-8-sig') as tsvfile:
            reader = DictReader(tsvfile, dialect='excel-tab')

            data = []
            for par in reader:
                for par2 in participants:
                    if par['participant_id'] == 'sub-' + par2['participant_id']:
                        par['age'] = par2['age']
                        par['sex'] = par2['sex']
                        par['hand'] = par2['hand']
                        break
                data.append(par)

            tsvfile.seek(0)
            tsvfile.truncate()

            writer = DictWriter(
                tsvfile,
                dialect='excel-tab',
                fieldnames=['participant_id', 'age', 'sex', 'hand'])
            writer.writeheader()
            for row in data:
                writer.writerow(row)

Exemple #26

0

Afficher le fichier

from sys import argv
from csv import DictReader, DictWriter

if len(argv) > 1:
    for filename in argv[1:]:
        try:
            with open(filename, mode="r", encoding="ISO-8859-1", newline="") as f:
                entries = [entry for entry in DictReader(f)]

                end = len(entries)
                for i in range(end // 2):
                    entries[i]["Position"], entries[end - 1 - i]["Position"] = (
                        entries[end - 1 - i]["Position"],
                        entries[i]["Position"],
                    )
        except Exception as e:
            print(e)
        else:
            with open(filename, mode="w", encoding="ISO-8859-1", newline="") as f:
                writer = DictWriter(f, fieldnames=list(entries[0].keys()))
                writer.writeheader()
                writer.writerows(reversed(entries))
                print("[OK]", filename)
else:
    print("No files given")

Exemple #27

0

Afficher le fichier

 def visit_schema(self, **_) -> None:
     self.classtab = DictWriter(sys.stdout,
                                ['Class Name', 'Parent Class', 'YAML Class Name', 'Description',
                                 'Flags', 'Slot Name', 'YAML Slot Name', 'Range', 'Card', 'Slot Description', 'URI'],
                                dialect=self.dialect)
     self.classtab.writeheader()

Exemple #28

0

Afficher le fichier

Fichier : pcap_to_flows.py Projet : anders617/wpt-data

def sort_flows(read, localhost, output):
    print("Opening reader...")
    pcap = dpkt.pcapng.Reader(read)

    flow_sorter = FlowSorter(localhost=localhost)
    print("Sorting packets...")
    flow_sorter.sort(pcap)

    if not os.path.exists(output):
        try:
            os.mkdir(output)
        except IOError as e:
            print("Error creating directory: {}, {}".format(output, e))

    flows = flow_sorter.flows()

    flow_row = {}
    flow_num = 0

    with open(os.path.join(output, 'flows.csv'), 'w') as flows_file:
        flows_fields = [
            'id', 'start_time', 'end_time', 'duration', 'src_ip', 'src_port',
            'dst_ip', 'dst_port', 'total_bytes', 'total_message_bytes',
            'avg_inter_arrival_time', 'median_inter_arrival_time',
            'std_inter_arrival_time', 'num_packets', 'src_names', 'dst_names'
        ]
        packet_fields = [
            'timestamp', 'inter_arrival_time', 'seq', 'ack', 'flag_CWR',
            'flag_ECE', 'flag_URG', 'flag_ACK', 'flag_PSH', 'flag_RST',
            'flag_SYN', 'flag_FIN', 'window_size', 'urgent_pointer',
            'tcp_header_size', 'tcp_payload_size', 'ip_header_size', 'ip_id',
            'ip_tos', 'ip_df', 'ip_mf', 'ip_offset'
        ]
        flows_writer = DictWriter(flows_file, fieldnames=flows_fields)
        flows_writer.writeheader()
        flow_row = {}
        packet_row = {}
        for flow in sorted(flows.values(),
                           key=lambda flow: flow.first_timestamp()):
            flow_row['id'] = flow_num
            flow_row['src_ip'] = str(flow.src_host().ip_addr())
            flow_row['src_port'] = flow.src_host().port()
            flow_row['src_names'] = flow.src_host().original_names()
            flow_row['dst_ip'] = str(flow.dst_host().ip_addr())
            flow_row['dst_port'] = flow.dst_host().port()
            flow_row['dst_names'] = flow.dst_host().original_names()
            flow_row['total_bytes'] = flow.total_bytes()
            flow_row['total_message_bytes'] = flow.total_message_bytes()

            inter_arrival_times = flow.inter_arrival_times()
            timestamps = flow.timestamps()
            flow_row['avg_inter_arrival_time'] = inter_arrival_times.mean(
            ) if len(flow) > 1 else np.nan
            flow_row['median_inter_arrival_time'] = np.median(
                inter_arrival_times) if len(flow) > 1 else np.nan
            flow_row['std_inter_arrival_time'] = inter_arrival_times.std(
            ) if len(flow) > 1 else np.nan
            flow_row['start_time'] = timestamps[0]
            flow_row['end_time'] = timestamps[-1]
            flow_row['duration'] = timestamps[-1] - timestamps[0]
            flow_row['num_packets'] = len(flow)
            flows_writer.writerow(flow_row)
            with open(os.path.join(output, 'flow_{}.csv'.format(flow_num)),
                      'w') as packets_file:
                packet_num = 0
                packet_writer = DictWriter(packets_file, packet_fields)
                packet_writer.writeheader()
                for timestamp, packet in flow:
                    packet_row['timestamp'] = timestamp
                    if packet_num == 0:
                        packet_row['inter_arrival_time'] = np.nan
                    else:
                        packet_row['inter_arrival_time'] = inter_arrival_times[
                            packet_num - 1]
                    packet_row['seq'] = packet.data.seq
                    packet_row['ack'] = packet.data.ack
                    packet_row['flag_CWR'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_CWR))
                    packet_row['flag_ECE'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_ECE))
                    packet_row['flag_URG'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_URG))
                    packet_row['flag_ACK'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_ACK))
                    packet_row['flag_PSH'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_PUSH))
                    packet_row['flag_RST'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_RST))
                    packet_row['flag_SYN'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_SYN))
                    packet_row['flag_FIN'] = int(
                        bool(packet.data.flags & dpkt.tcp.TH_FIN))
                    packet_row['window_size'] = packet.data.win
                    packet_row['urgent_pointer'] = packet.data.urp
                    packet_row['tcp_payload_size'] = len(packet.data.data)
                    packet_row['tcp_header_size'] = len(packet.data) - len(
                        packet.data.data)
                    packet_row['ip_header_size'] = packet.hl
                    packet_row['ip_id'] = packet.id
                    packet_row['ip_tos'] = packet.tos
                    packet_row['ip_df'] = packet.df
                    packet_row['ip_mf'] = packet.mf
                    packet_row['ip_offset'] = packet.offset
                    packet_writer.writerow(packet_row)
                    packet_num += 1
            flow_num += 1

Exemple #29

0

Afficher le fichier

Fichier : classify.py Projet : pslmounika/Naive-Bayes-Classifier

    # print "%.3f" % nltk.classify.accuracy(classifier, dev_test)
    classifier.show_most_informative_features(200)
    # classifier.prob_classify(featurize(name))

    right = 0
    total = len(dev_test)
    for ii in dev_test:
        prediction = classifier.classify(ii[0])
        if prediction == ii[1]:
            right += 1
    sys.stderr.write("Accuracy on dev: %f\n" % (float(right) / float(total)))

    if testfile is None:
        sys.stderr.write("No test file passed; stopping.\n")
    else:
        # Retrain on all data
        classifier = nltk.classify.NaiveBayesClassifier.train(dev_train + dev_test)

        # Read in test section
        test = {}
        for ii in DictReader(testfile, delimiter='\t'):
            test[ii['id']] = classifier.classify(fe.features(ii['text']))

        # Write predictions
        o = DictWriter(outfile, ['id', 'pred'])
        o.writeheader()
        for ii in sorted(test):
            o.writerow({'id': ii, 'pred': test[ii]})

Exemple #30

0

Afficher le fichier

Fichier : tabulate_area_functions.py Projet : olivierpfrancois/perso_nonsync

                    out.append(v)

                #Remove the no data value if any
                if no_data:
                    uniques.discard(n)

                #Export to a tab delimited file
                if len(bands) > 1:
                    outNm = re.sub(".txt", "_" + str(b) + ".txt", outTxt)
                else:
                    outNm = outTxt

                with open(outNm, "w") as f:
                    dict_writer = DictWriter(f, ['id'] + sorted(
                        uniques,
                        key=lambda x: float(re.sub('^' + prefix, '', x))),
                                             extrasaction='ignore',
                                             delimiter="\t",
                                             restval="0")
                    dict_writer.writeheader()
                    for p in out:
                        dict_writer.writerow(p)

        if statistics:
            for b, s in zip(bands, stats):
                #Transform the result dictionary for the export
                out = []
                #Prepare a set to hold the unique values in the raster
                for k, v in s.items():
                    v['id'] = k
                    out.append(v)