예제 #1
0
    def get_all_tweets(self, screen_name, new_tweets, csv):
        """
		https://gist.github.com/yanofsky/5436496
		check that users are not duplicated, only get tweets once for a user
		"""
        #Twitter only allows access to a users most recent 3240 tweets with this method

        #initialize a list to hold all the tweepy Tweets
        alltweets = []

        #save most recent tweets
        alltweets.extend(new_tweets)

        #save the id of the oldest tweet less one
        oldest = alltweets[-1].id - 1

        #keep grabbing tweets until there are no tweets left to grab
        while len(new_tweets) > 0:
            print "getting tweets before %s" % (oldest)

            try:
                #make initial request for most recent tweets (200 is the maximum allowed count)
                new_tweets = self.twitter.user_timeline(
                    screen_name=screen_name, count=200, max_id=oldest)
            except tweepy.TweepError as e:
                print 'I just caught the exception: %s' % str(e)
                continue

            #save most recent tweets
            alltweets.extend(new_tweets)

            #update the id of the oldest tweet less one
            oldest = alltweets[-1].id - 1

            print "...%s tweets downloaded so far" % (len(alltweets))
        #transform the tweepy tweets into a 2D array that will populate the csv
        outtweets = [[
            tweet.user.id_str, tweet.user.screen_name, tweet.lang,
            tweet.id_str, tweet.created_at,
            tweet.text.encode("utf-8"), tweet.favorite_count,
            tweet.retweet_count
        ] for tweet in alltweets]
        csv.writerows(outtweets)
        return alltweets
예제 #2
0
def get_qa_csv():
    with open(ROOT + "/unambiguous_links.json", "rb") as fin:
        cin = unicodecsv.DictReader(fin)
        rows = [row for row in cin]

    def normalize(s):
        return re.sub(r"<[^>]+>", "", strip_cantillation(s, strip_vowels=True))

    tanakh = random.sample([
        x for x in rows if Ref(x['Quoted Ref']).primary_category == "Tanakh"
        and Ref(x['Quoting Ref']).is_segment_level()
    ], 250)
    talmud = random.sample([
        x for x in rows if Ref(x['Quoted Ref']).primary_category == "Talmud"
        and Ref(x['Quoting Ref']).is_segment_level()
    ], 250)
    qa_rows = [{
        "Found Text":
        normalize(Ref(x['Quoted Ref']).text("he").ja().flatten_to_string()),
        "Source Text":
        "...".join(
            get_snippet_by_seg_ref(
                Ref(x['Quoting Ref']).text('he'), Ref(x['Quoted Ref']))),
        "URL":
        "https://sefaria.org/{}?p2={}".format(
            Ref(x['Quoting Ref']).url(),
            Ref(x['Quoted Ref']).url()),
        "Wrong segment (seg) / Wrong link (link)":
        ""
    } for x in (tanakh + talmud)]

    with open(ROOT + "/QA Section Links.csv", "wb") as fout:
        csv = unicodecsv.DictWriter(fout, [
            "Source Text", "Found Text", "URL",
            "Wrong segment (seg) / Wrong link (link)"
        ])
        csv.writeheader()
        csv.writerows(qa_rows)
예제 #3
0
def get_mutation_patterns(all_reads_ids_list, max_freq_allele, germline_fasta,
                          unique_real_reads_fasta):
    mutation_patterns_dict = {}
    print type(all_reads_ids_list), len(all_reads_ids_list)
    for read_id in all_reads_ids_list:
        ref_seq_id, test_seq_id = max_freq_allele, read_id
        print ref_seq_id, type(test_seq_id), len(test_seq_id), type(
            germline_fasta), type(unique_real_reads_fasta)
        ref_seqrecord, test_seqrecord = germline_fasta[
            ref_seq_id], unique_real_reads_fasta[test_seq_id]
        ref_seq_id, test_seq_id = ref_seq_id.replace('/', '').replace(
            '*', ''), test_seq_id.replace('/', '').replace('*', '')
        out = open('%s_%s_pair.fasta' % (test_seq_id, ref_seq_id), 'w')
        SeqIO.write(ref_seqrecord, out, 'fasta')
        SeqIO.write(test_seqrecord, out, 'fasta')
        out.close()
        #my_ref_len = len(rank_germ[i])
        file_for_clustalw = '%s_%s_pair.fasta' % (test_seq_id, ref_seq_id)
        do_clustalw(file_for_clustalw)
        clustalw_result = '%s_%s_pair.aln' % (test_seq_id, ref_seq_id)
        mutation_patterns_dict = caculate_mutation_patterns(
            clustalw_result, read_id, mutation_patterns_dict)
        os.system("rm %s_%s_pair.fasta" % (test_seq_id, ref_seq_id))
        os.system("rm %s_%s_pair.aln" % (test_seq_id, ref_seq_id))
        os.system("rm %s_%s_pair.dnd" % (test_seq_id, ref_seq_id))
    mutation_patterns_group = {}
    for (key, value) in mutation_patterns_dict:
        mutation_patterns_group.setdefault(value[0], []).append(
            (key, value[0]))
    data = np.zeros(len(mutation_patterns_group),
                    len(germline_fasta[max_freq_allele]))
    for index, (group_number, value) in enumerate(mutation_patterns_group):
        for item in value:
            position = item[1][0]
            data[index][position] += 1
    ref_seq_id_name = ref_seq_id.split('*')[0]
    mutation_patterns_file = open(
        '/zzh_gpfs02/yanmingchen/HJT-PGM/Naive/%s/%s_%s_mutation_patterns.txt'
        % (prj_name, prj_name, ref_seq_id_name), 'w')
    print data
    mutation_patterns_writer = csv.writerows(data)
    mutation_patterns_file.close()
예제 #4
0
import csv

from pickle_load import pickle_load

if __name__ == '__main__':
    model = pickle_load('model/test_rfr.pickle')

    for i in range(0, 7):
        dataset = pickle_load(f'processed/preprocessed_test_{i}.pickle')

        print('Predicting...')
        pred = model.predict(dataset)
        print('Predicted!!')

        with open('submit.csv', 'a') as f:
            print('Writing....')
            output = csv.writer(f, lineterminator='\n')
            csv.writerows(pred, f)
            print('Done!')
예제 #5
0
import os
import sys
import csv
import sqlite3

base_dir = os.path.dirname(os.path.realpath(__file__))
db_path = os.path.join(base_dir, 'db/lightspeed.db')

if len(sys.argv) == 2:
    db_path = os.path.realpath(sys.argv[1])

try:
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    fieldnames = [
        'ID', 'Ping (ms)', 'Download (Mbit/s)', 'Upload (Mbit/s)', 'Timestamp',
        'Duration (s)', 'Error'
    ]

    csv = csv.writer(sys.stdout, delimiter=';', quoting=csv.QUOTE_MINIMAL)

    result = c.execute('SELECT * FROM measurements')

    csv.writerow(fieldnames)
    csv.writerows(result)
except sqlite3.Error as e:
    print('Error:', e.args[0])
finally:
    if conn:
        conn.close()
예제 #6
0
#!/usr/bin/env python
import os
import sys
import csv
import sqlite3

base_dir = os.path.dirname(os.path.realpath(__file__))
db_path = os.path.join(base_dir, 'db/lightspeed.db')

if len(sys.argv) == 2:
    db_path = os.path.realpath(sys.argv[1])

try:
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    fieldnames = ['ID', 'Ping (ms)', 'Download (Mbit/s)',
                  'Upload (Mbit/s)', 'Timestamp', 'Duration (s)', 'Error']

    csv = csv.writer(sys.stdout, delimiter=';', quoting=csv.QUOTE_MINIMAL)

    result = c.execute('SELECT * FROM measurements')

    csv.writerow(fieldnames)
    csv.writerows(result)
except sqlite3.Error as e:
    print('Error:', e.args[0])
finally:
    if conn:
        conn.close()
예제 #7
0
 def outputPlaces(csv, places):
     #print("must output " + str(len(places)))
     print("Loading...")
     for place in places:
         csv.writerows(getPlaceData(place))
    modifier = math.pow(60, len(split_time)-1)
    seconds = 0
    for time_part in split_time:
        seconds += (float(time_part) * modifier)
        modifier /= 60
    return seconds

rows = [('seconds_wall_clock', 'kbyte_memory', 'percent_cpu', 'hours_cpu', 'tool')]
for file_path in snakemake.input:
	tool = file_path.split("_")[-2]
	wall_clocks = []
	memories = []
	percent_cpus = []
	elapsed = []
	with open(file_path) as inf:
		for line in inf:
			line = line.strip()
			if 'Elapsed (wall clock)' in line:
				wall_clocks.append(time_to_seconds(line.split()[-1]))
			if 'Maximum resident set size' in line:
				memories.append(int(line.split()[-1])) 
			if 'Percent of CPU this job got' in line:
				percent_cpus.append(int(line.split()[-1][:-1]))
	cpu_hours = (np.array(wall_clocks)/3600.)*(np.array(percent_cpus)/100.)
	m = np.argmin(cpu_hours)
	rows.append((wall_clocks[m], memories[m], percent_cpus[m], cpu_hours[m], tool))

with open(snakemake.output[0], 'w') as outf:
	csv = csv.writer(outf, delimiter='\t')
	csv.writerows(rows)
file = './data/kenpom2018.json'
with open(file) as f:
    jsonString = f.readline()
    
    data = json.loads(jsonString)

fileName = './data/ESPN_NCAA_Dict.csv'

fields=[]
with open(fileName) as f:
    reader = csv.DictReader(f)
    headers = reader.fieldnames
    fields = headers+['kenpom']
    records =[]
    for row in reader:
        data = {}
        for h in headers:
            data[h] = row[h]
        data['kenpom'] = cleanName(data['NCAA'])
        records.append(data)


outCsv = 'nameMap.csv'
with open(outCsv,'w') as f:
    csv = csv.DictWriter(f,fields)
    csv.writeheader()
    csv.writerows(records)
            
    
예제 #10
0
    #print the file number at the beginning of each histogram
    file_number = 'file number: %s' % (number)
    print (file_number)
    
    i += 1
    keys = ['time','counter']
    data = [time.time()-timestart,i]
    #Read the histogram and print to console
    #change histogram to show integer values instead of #/cc in bins
    for key, value in alpha.histogram(number_concentration = False).items():
        #separated by commas
        data.append(value)
        keys.append(key)            
        print ("i: {}\tKey: {}\tValue: {}".format(i, key, value))
    if i == 1:
        csv.writerows([keys])
    csv.writerows([data])
    time.sleep(0.5)
"""
Other values that can be read from the OPC-N2
n = alpha.sn() #Serial Number
print(n)
alpha.read_firmware()
"""

#close the file
file_csv.close()

#Shut down the opc
alpha.off()
예제 #11
0
 def outputPlaces(csv, places):
     #print("must output " + str(len(places)))
     print("Loading...")
     for place in places:
         csv.writerows(getPlaceData(place))
예제 #12
0
                      "ALTER TABLE raw_order ALTER COLUMN id SET DEFAULT nextval('raw_order_id_seq');"
        conn.execute(query_alter)
        print('Type of id-column updated successfully.')

with open(LAST_UPDATED_AT_PATH, 'r') as f:
    last_updated_at = f.read()

# Load data from source_db to temp .csv
engine = create_engine(f"postgresql://{source_login}:{source_pass}@{source_host}{source_port}/{source_name}")
with engine.connect() as conn:
    cursor = conn.execute(f'SELECT * FROM "order" '
                          f'WHERE updated_at > {last_updated_at} '
                          f'ORDER BY updated_at LIMIT {LIMIT_TO_LOAD};')
    with open(TEMP_CSV_PATH, 'w') as f:
        csv = csv.writer(f)
        csv.writerows(cursor)

dest_source_mapping = {
    'order_id': 'id',
    'student_id': 'student_id',
    'teacher_id': 'teacher_id',
    'stage': 'stage',
    'status': 'status',
    'created_at': 'created_at',
    'updated_at': 'updated_at',
}

# Record data from temp .csv to destination_db
engine = create_engine(f"postgresql://{dest_login}:{dest_pass}@{dest_host}{dest_port}/{dest_name}")
with engine.connect() as conn:
    with open(TEMP_CSV_PATH, 'r') as f:
예제 #13
0
import csv
#此处为写入csv文件

with open('测试python写入csv.csv', 'a',
          newline='') as csv_xie:  #此处不使用newline的话,那么csv写入的数据之间就会空出一行空白行
    csv = csv.writer(csv_xie)
    csv.writerow(['姓名', '年龄', '电话'])  #单行写入
    test = [('小a', '17', '110'), ('小b', '19', '120')]
    csv.writerows(test)  #多行写入
예제 #14
0
            )  #make sure the tens and hundreds places are accounted for
            if index >= number:
                number = index + 1

file_name = '/home/pi/OpticalParticleCounters/DATA/NovaData/NOVAdata%s.csv' % (
    number)
file_csv = open(file_name, 'w')
csv = csv.writer(file_csv, delimiter=',')
#
PM25 = 0  #initialize PM25 and PM10
PM10 = 0
counter = 0
timestart = time.time()
timer = 0
csv.writerows(
    [["Counter", "Time", "PM2.5",
      "PM10"]])  #make sure to use two square brackets when using csv.writerows

while True:
    s = ser.read(1)
    if ord(s) == int("AA", 16):
        s = ser.read(1)
        if ord(s) == int("C0", 16):
            s = ser.read(7)
            a = []
            for i in s:
                a.append(i)
            #print(a)
            pm2hb = s[0]
            pm2lb = s[1]
            pm10hb = s[2]