main_metric_reader = csv.DictReader(
    open('../data/all_treated_editor_set_sort_by_date.csv', 'r'))
all_wiki_reader = csv.DictReader(
    open('../data/history_all_wikipedia_sort_by_date.csv', 'r'))
all_wiki_line = all_wiki_reader.next()
writer = csv.DictWriter(
    open('../data/all_treated_allwikireten_currentweek.csv', 'wb'),
    fieldnames=[
        'ArticleId', 'RelWeek', 'SumOldEditorRetenAllWiki',
        'SumNewEditorRetenAllWiki', 'SumPreShockRetenAllWiki',
        'SumPostShockRetenAllWiki', 'SumNewWikiRetenAllWiki',
        'SumNewNonWikiRetenAllWiki', 'MeanOldEditorRetenAllWiki',
        'MeanNewEditorRetenAllWiki', 'MeanPreShockRetenAllWiki',
        'MeanPostShockRetenAllWiki', 'MeanNewWikiRetenAllWiki',
        'MeanNewNonWikiRetenAllWiki', 'MedOldEditorRetenAllWiki',
        'MedNewEditorRetenAllWiki', 'MedPreShockRetenAllWiki',
        'MedPostShockRetenAllWiki', 'MedNewWikiRetenAllWiki',
        'MedNewNonWikiRetenAllWiki', 'LogMeanOldEditorRetenAllWiki',
        'LogMeanNewEditorRetenAllWiki', 'LogMeanPreShockRetenAllWiki',
        'LogMeanPostShockRetenAllWiki', 'LogMeanNewWikiRetenAllWiki',
        'LogMeanNewNonWikiRetenAllWiki'
    ],
    extrasaction='ignore')
writer.writeheader()

CurrStartDate = date(2000, 1, 1)
CurrEndDate = CurrStartDate + timedelta(days=6)
CurrRetentionStartDate = CurrStartDate + timedelta(days=7)
CurrRetentionEndDate = CurrStartDate + timedelta(days=34)
AllWikiRevDict = {}
def writeHeader(file):
    if os.stat(file).st_size == 0:
        with open(file, "wb") as write:
            fieldnames = ["item_id", "item_name", "item_price"]
            writer = csv.DictWriter(write, fieldnames=fieldnames)
            writer.writeheader()
    def y_cruncher(self):
        needHeader = False
        if not os.path.isfile(const.datadir+'y_cruncher.csv'):
            needHeader = True
        os.system("mkdir "+const.datadir)
        with open(const.datadir+'y_cruncher.csv', 'a') as fout:
            row = OrderedDict([('instanceID', None), ('experimentID', None), ('instanceType', None),
                               ('memoryInfo', None), ('processorInfo',
                                                      None), ('sysTopology', None),
                               ('osVersion', None), ('testStartTime',
                                                     None), ('availableMemory', None),
                               ('isMultiThread', None), ('cpuUtilization',
                                                         None), ('multiCoreEfficiency', None),
                               ('computationTime', None), ('benchmarkTime',
                                                           None), ('wallTime', None)
                               ])
            # benchmarkTime = computationTime + I/O operation overhead
            writer = csv.DictWriter(fout, fieldnames=row)
            if needHeader:
                writer.writeheader()
            row['instanceType'] = self.kw['instanceType']
            row['instanceID'] = self.kw['instanceID']
            row['experimentID'] = self.kw['experimentID']
            row['wallTime'] = self.kw['duration']
            # row['testOption']=self.kw['testOption']

            for line in self.string:

                if line.find('Multi-core Efficiency') != -1:
                    obj = re.search(r'(\d*\.\d* %)', line)
                    row['multiCoreEfficiency'] = obj.group(1)
                if line.find('CPU Utilization') != -1:
                    obj = re.search(r'(\d*\.\d* %)', line)
                    row['cpuUtilization'] = obj.group(1)
                if line.find('Multi-Threading') != -1:
                    obj = re.search(r'\[01;36m(\w*)', line)
                    row['isMultiThread'] = obj.group(1)
                if line.find('Available Memory') != -1:
                    obj = re.search(r'1;33m(.*?B)', line)
                    row['availableMemory'] = obj.group(1)
                if line.find('Version') != -1:
                    obj = re.search(r'(\s+)(.*)', line)
                    row['osVersion'] = obj.group(2)
                if line.find('Topology') != -1:
                    obj = re.search(r'(\s+)(.*)', line)
                    row['sysTopology'] = obj.group(2)
                if line.find('Processor(s):') != -1:
                    obj = re.search(r'(\s+)(.*)', line)
                    row['processorInfo'] = obj.group(2)
                if line.find('Usable Memory') != -1:
                    obj = re.search(r'\((.*?B)', line)
                    row['memoryInfo'] = obj.group(1)
                if line.find('Start Time') != -1:
                    obj = re.search(
                        r'Start Time: .*?(01;33m)(.*)(\[01;37m)', line)
                    row['testStartTime'] = obj.group(2)
                if line.find('Wall Time') != -1:
                    obj = re.search(r'(\d*\.\d*).*seconds', line)
                    row['benchmarkTime'] = obj.group(1)
                if line.find('Total Computation') != -1:
                    obj = re.search(r'(\d*\.\d*).*seconds', line)
                    row['computationTime'] = obj.group(1)
                # TODO more attributes

            writer.writerow(row)
    def draw(self, screen, weather, updated):
        if weather is None or not updated:
            return

        current = weather["current"]
        daily = weather["daily"][0]

        short_summary = _(current["weather"][0]["main"])
        icon = current["weather"][0]["icon"]
        temperature = current["temp"]
        humidity = current["humidity"]
        feels_like = current["feels_like"]
        pressure = current["pressure"]
        uv_index = int(current["uvi"])
        try:
            rain_1h = current["rain"]["1h"]
        except KeyError:
            rain_1h = '0'
        windspeed = current["wind_speed"]
        try:
            windgust = current["wind_gust"]
        except KeyError:
            windgust = 'nan'
        print(windgust)
        long_summary = daily["weather"][0]["description"]
        temperature_high = daily["temp"]["max"]
        temperature_low = daily["temp"]["min"]
        heat_color = Utils.heat_color(temperature, humidity, self.units)
        uv_color = Utils.uv_color(uv_index)
        weather_icon = Utils.weather_icon(icon, self.icon_size)

        #temperature = Utils.temperature_text(int(temperature), self.units)
        temperature = Utils.temperature_text(round(temperature, 1), self.units)
        feels_like = Utils.temperature_text(int(feels_like), self.units)
        temperature_low = Utils.temperature_text(int(temperature_low),
                                                 self.units)
        temperature_high = Utils.temperature_text(int(temperature_high),
                                                  self.units)
        humidity = Utils.percentage_text(humidity)
        uv_index = str(uv_index)
        pressure = Utils.pressure_text(int(pressure))

        """
        HistoryGraphLog - log data to GraphDatalog.txt
        """
        # TODO: Add maintenance of GraphDataLog.txt for removing old data to keep file small.
        xtemperature = temperature
        xtemperature = xtemperature[:-2]
        xpressure = pressure
        xpressure = xpressure[:-2]
        xtimestamp = time.strftime('%m-%d-%Y %H:%M:%S')

        graph = "GraphDataLog.txt"
        file = open(graph, "a", newline='')

        with file:
            myfields = ['xdate', 'temp', 'press', 'rain_1h', 'windspeed', 'windgust']
            writer = csv.DictWriter(file, fieldnames=myfields)
            #writer.writeheader()
            writer.writerow({'xdate': xtimestamp, 'temp': xtemperature, 'press': xpressure, 'rain_1h': rain_1h, 'windspeed': windspeed, 'windgust': windgust})
        #file.close()
        df = pandas.read_csv(graph)

        # convert to datetime
        df['xdate'] = pandas.to_datetime(df['xdate'])
        # calculate mask
        m1 = df['xdate'] >= (pandas.to_datetime('now') - pandas.DateOffset(days=1))
        m2 = df['xdate'] <= pandas.to_datetime('now')
        #mask = m1 & m2
        mask = m1
        # output masked dataframes
        # df[~mask].to_csv('out1.csv', index=False)
        #Remove time from datetime
        #df['xdate'] = pandas.to_datetime(df['xdate']).dt.date
        df[mask].to_csv('GraphData.csv', index=False)
        """
        END GraphLog
        """


        text_x = weather_icon.get_size()[0]
        text_width = self.rect.width - text_x

        message1 = self.text_warp("{} {}".format(temperature, short_summary),
                                  text_width,
                                  "medium",
                                  bold=True,
                                  max_lines=1)[0]
        message2 = "{} {}   {} {} {} {}".format(_("Feels Like"), feels_like,
                                                _("Low"), temperature_low,
                                                _("High"), temperature_high)
        if self.text_size(message2, "small")[0] > text_width:
            message2 = "Feel {}  {} - {}".format(feels_like, temperature_low,
                                                 temperature_high)
        message3 = "{} {}  {} {}  {} {}".format(_("Humidity"), humidity,
                                                _("Pressure"), pressure,
                                                _("UVindex"), uv_index)
        if self.text_size(message3, "small")[0] > text_width:
            message3 = "{}  {}  UV {}".format(humidity, pressure, uv_index)
        max_lines = int((self.rect.height - 55) / 15)
        message4s = self.text_warp(long_summary,
                                   text_width,
                                   "small",
                                   bold=True,
                                   max_lines=max_lines)

        self.clear_surface()
        self.draw_image(weather_icon, (0, 0))
        self.draw_text(message1, (text_x, 15), "large", heat_color, bold=True)
        self.draw_text(message2, (text_x, 52), "small", "white")
        i = message3.index("UV")
        (right, _bottom) = self.draw_text(message3[:i], (text_x, 70), "small",
                                          "white")
        self.draw_text(message3[i:], (right, 70), "small", uv_color, bold=True)
        height = 70 + (15 * (max_lines - len(message4s))) / 2
        for message in message4s:
            self.draw_text(message, (text_x, height),
                           "small",
                           "blue",
                           bold=True)
            height += 15
        self.update_screen(screen)
Exemple #5
0
def DictWriter(f, fieldnames):
    return csv.DictWriter(f, fieldnames, delimiter='\t', dialect='excel-tab')
Exemple #6
0
def results(fname, cfg):
    with open(fname, 'w', newline='') as csvfile:
        lines = []
        fieldnames = ['ID', 'GeCo2 bytes', 'GeCo3 bytes', 'GeCo2 secs', 'GeCo3 secs', 'Mode', 'L.Rate', 'H.Nodes']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        totalb2 = 0
        totalb3 = 0

        totals2 = 0
        totals3 = 0

        for [l, lr, hs, f] in cfg:
            if l == '16':
                cmd = ['./GeCo2', '-lr', lr, '-hs', hs, f]
                cmd[1:1] = g2l16.split()
                out = subprocess.check_output(cmd)
            else:
                out = subprocess.check_output(['./GeCo2','-l', l, '-lr', lr, '-hs', hs, f])

            sout = str(out)

            bytes2 = sout.split('Total bytes: ')[1].split()[0]
            bytes2 = int(bytes2)
            totalb2 = totalb2 + bytes2
            bytes2 = format(bytes2, ',d')

            secs2 = sout.split('Spent ')[1].split()[0]
            secs2 = float(secs2)
            totals2 = totals2 + secs2
            secs2 = str(round(secs2, 1))


            out = subprocess.check_output(['./GeCo3','-l', l, '-lr', lr, '-hs', hs, f])
            sout = str(out)
            bytes3 = sout.split('Total bytes: ')[1].split()[0]
            bytes3 = int(bytes3)
            totalb3 = totalb3 + bytes3
            bytes3 = format(bytes3, ',d')

            secs3 = sout.split('Spent ')[1].split()[0]
            secs3 = float(secs3)
            totals3 = totals3 + secs3
            secs3 = str(round(secs3, 1))

            d = {'ID': os.path.basename(f),
                 'GeCo2 bytes': bytes2, 'GeCo3 bytes': bytes3,
                 'GeCo2 secs': secs2, 'GeCo3 secs': secs3,
                 'Mode': l, 'L.Rate': lr, 'H.Nodes': hs}
            lines.append(d)
            print(d)

        for l in reversed(lines):
            writer.writerow(l)

        totalb2 = format(totalb2, ',d')
        totalb3 = format(totalb3, ',d')

        totals2 = str(round(totals2, 1))
        totals3 = str(round(totals3, 1))

        d = {'ID': 'Total',
             'GeCo2 bytes': totalb2, 'GeCo3 bytes': totalb3,
             'GeCo2 secs': totals2, 'GeCo3 secs': totals3,
             'Mode': '', 'L.Rate': '', 'H.Nodes': ''}
        writer.writerow(d)
        result = re.search(r"\((\w.+)\)",
                           user)  #takes the username as catching group
        if result and result[1] not in username_list:
            username_list.append(result[1])
            username_list.sort()

    for name in username_list:
        per_user = {"Username": "", "INFO": 0, "ERROR": 0}
        per_user["Username"] = name
        for line in events:
            if re.search(r"INFO \w.* \(" + name + "\)", line):
                per_user["INFO"] = per_user.get("INFO", 0) + 1
            elif re.search(r"ERROR \w.* \(" + name + "\)", line):
                per_user["ERROR"] = per_user.get("ERROR", 0) + 1

        userdata_list.append(per_user)

#----------generate error_message.csv-----------#
keys = ["Error", "Count"]
with open("error_message.csv", "w") as error_message_report:
    writer = csv.DictWriter(error_message_report, fieldnames=keys)
    writer.writeheader()
    writer.writerows(error_sorted)

#----------generate user_statistics.csv-----#
keys = ["Username", "INFO", "ERROR"]
with open("user_statistics.csv", "w") as user_statistics_report:
    writer = csv.DictWriter(user_statistics_report, fieldnames=keys)
    writer.writeheader()
    writer.writerows(userdata_list)
Exemple #8
0
                # plt.ylabel(column2)
                plt.title("{0} x {1}".format(column1, column2))

                for poly_order in polyDeg:
                    coefs = np.polyfit(
                        x, y, poly_order)  # we also want to do this for 2, 3
                    f = np.poly1d(coefs)
                    #print(np.poly1d(f))
                    xs, new_line = generate_points(f, min(x), max(x))
                    #               plt.plot(xs, new_line)
                    plt.plot(xs, new_line, color="red")
                    #Uncomment this line for the pairs plot

    if not debug:
        # Note: I have spent no effort making it pretty, and recommend that you do :)
        plt.show()
        plt.legend()
        # plt.tight_layout()
        # plt.show()
        plt.savefig("./my_pairs_plot.png")


plotting(our_dictionary)
#plotting(different_dictionary)
#print(our_dictionary)
#split each string in list to get a lis of list and I use the function checkdelim to see which delimiter is used
with open("myfile.csv", "w") as myCSV:
    w = csv.DictWriter(myCSV, our_dictionary.keys())
    w.writeheader()
    w.writerow(our_dictionary)
def write_csv(data,filename):
    with open(filename, 'w+') as outf:
        writer = csv.DictWriter(outf, data[0].keys())
        writer.writeheader()
        for row in data:
            writer.writerow(row)
Exemple #10
0
infields = [
    'id', 'str_resource', 'str_description', 'website', 'meta_title',
    'meta_description', 'stage_list', 'task_list'
]

outfields = infields + ['stage_list_facet', 'task_list_facet']

with open(sys.argv[1], 'r') as infile, open(sys.argv[2], 'w') as outfile:

    # skip header row in order to use own fieldnames
    next(infile)

    # instantiate the reader and writer objects
    dr = csv.DictReader(infile, fieldnames=infields)
    dw = csv.DictWriter(outfile, fieldnames=outfields)
    dw.writeheader()

    exp = re.compile(r'\d+::([^\b])')

    # loop over the input file, writing results to output file
    for row in dr:

        # remove hash marks from URL
        m = re.search('#(.+)#', row['website'])
        if m:
            row['website'] = m.group(1)

        # remove spaces from all multivalued fields
        row['stage_list_facet'] = row['stage_list'].replace('; ', ';')
        row['task_list_facet'] = row['task_list'].replace('; ', ';')
Exemple #11
0
    uid = int(uid)
    try:
        val = float(val)
    except ValueError:
        print('非数字输入,将保留文本格式')

    df = pd.read_csv('rules')

    df.set_value(df.set_index('用户id').index.get_loc(uid), para, val)

    df.to_csv('rules', index=False)

    print('已成功更改', uid, '的认领信息')


filename = 'rules'
with open(filename, 'a', newline='', encoding='utf-8') as f:
    fieldnames = ['用户id', '最小单种体积', '最少总认领体积', \
  '最少总认领数', '最少做种时间', \
  '认领名次合格种子数比例', '认领名次合格种子数体积比例', \
  '认领名次魔力比例', '合格认领小组', '不合格认领分类', \
  '工资比例', '工资体积系数', '工资种子寿命系数', \
  '工资做种时间系数', '工资做种人数系数', '最多允许同伴数', \
  '最少第一认领占体积比', '备注']

    thewriter = csv.DictWriter(f, fieldnames=fieldnames)
    existed_uid = set()
    # get all the existed uid in a set to aovid dup rules
    rf = csv.DictReader(open(filename, encoding='utf-8'))
    for i in rf:
        existed_uid.add(i['用户id'])
Exemple #12
0
                if domain not in href:
                    if href not in link_list and href.startswith("http"):
                        print "[*] Discovered external link: %s" % href
                        link_list.append(href)

    return link_list


record_list = search_domain(domain)
link_list = []

for record in record_list:

    html_content = download_page(record)

    print "[*] Retrieved %d bytes for %s" % (len(html_content), record['url'])

    link_list = extract_external_links(html_content, link_list)

print "[*] Total external links discovered: %d" % len(link_list)

with codecs.open("%s-links.csv" % domain, "wb", encoding="utf-8") as output:

    fields = ["URL"]

    logger = csv.DictWriter(output, fieldnames=fields)
    logger.writeheader()

    for link in link_list:
        logger.writerow({"URL": link})
Exemple #13
0
outfile = open(outfilename, 'w')
# write the file data row to the first line of the output file
outfile.write(filedata)

reader = csv.DictReader(infile)
dataheaders = reader.fieldnames

parser = multiline_parser(line_captures)
field_names = parser.get_keys(dataheaders)

# also filetype
field_names = ['filetype'] + field_names

outfile.write(",".join(field_names) + "\n")
dw = csv.DictWriter(outfile,
                    fieldnames=field_names,
                    restval='',
                    extrasaction='ignore')

(contracts, invoices, orders, nabs) = (0, 0, 0, 0)
total_rows = 0
for row in reader:
    total_rows += 1
    filename = "../../" + row['txt_location']
    fileid = row['fcc_id']
    intid = int(fileid)
    url_fixed = row['file_url'].upper()
    url_fixed = url_fixed.replace("%20", " ")
    url_fixed = url_fixed.replace("%2D", "/")

    ## This is a KYW-specific naming convention, apparently.
    #print "\n\n" + row['file_url']
Exemple #14
0
def validate_and_process():
    fips_set = None

    with open(os.path.join(repo_root, 'data/county_3220.geojson')) as in_file:
        data = json.load(in_file)
        features = data['features']
        fips_list = [str(int(x['properties']['GEOID'])) for x in features]
        fips_set = set(fips_list)

    # note: we need to open the files with encoding `utf-8-sig` to correctly parse
    # the byte-order mark (bom) of the sources files
    # https://stackoverflow.com/a/49150749
    with open(os.path.join(dir_path, '_working/testing_usafacts.csv'),
              encoding='utf-8-sig') as testing_in_file, open(
                  os.path.join(dir_path, '_working/positivity_raw.csv'),
                  encoding='utf-8-sig') as positivity_in_file, open(
                      os.path.join(repo_root, 'docs/testing_usafacts.csv'),
                      'w+') as testing_out_file, open(
                          os.path.join(repo_root,
                                       'docs/testingpos_usafacts.csv'),
                          'w+') as positivity_out_file:
        testing_csv_reader = csv.DictReader(testing_in_file)
        testing_source_field_names = testing_csv_reader.fieldnames

        positivity_csv_reader = csv.DictReader(positivity_in_file)
        positivity_source_field_names = positivity_csv_reader.fieldnames

        ###!Not sure about the following error handling. The code worked without it.
        # VALIDATE: make sure testing contain yesterday's data
        #yesterday = datetime.now(pytz.timezone('US/Central')) - timedelta(days=1)
        #yesterday_source_field = yesterday.strftime('%-m/%-d/%y')
        #print(yesterday)
        #print(yesterday_source_field)
        #testing_last_date = testing_source_field_names[-1]
        #print(testing_last_date)
        #if testing_last_date != yesterday_source_field:
        #  raise ValueError("Testing do not contain yesterday's data; last date {}".format(testing_last_date))
        #  pass

        # VALIDATE: make sure positivity contain yesterday's data
        #positivity_last_date = positivity_source_field_names[-1]
        #print(positivity_last_date)
        #if positivity_last_date != yesterday_source_field:
        #  raise ValueError("Positivity do not contain yesterday's data; last date {}".format(positivity_last_date))
        #  pass

        testing_out_rows = []
        positivity_out_rows = []

        # VALIDATE: make sure all testing rows belong to a known county
        for testing_row in testing_csv_reader:
            fips = testing_row['countyFIPS']
            county_name = testing_row['County Name']
            state_abbr = testing_row['State']

            if fips not in fips_set:
                print(
                    'WARNING: Testing - Skipping unknown county based on FIPS ({}): {} County, {}'
                    .format(fips, county_name, state_abbr))
                continue

            testing_out_rows.append(testing_row)

        # VALIDATE: make sure all positivity rows belong to a known county
        for positivity_row in positivity_csv_reader:
            fips = positivity_row['countyFIPS']
            county_name = positivity_row['County Name']
            state_abbr = positivity_row['State']

            if fips not in fips_set:
                print(
                    'WARNING: Positivity - Skipping unknown county based on FIPS ({}): {} County, {}'
                    .format(fips, county_name, state_abbr))
                continue

            positivity_out_rows.append(positivity_row)
        '''
      LOAD
      '''

        out_field_names = list(testing_out_rows[0].keys())

        testing_csv_writer = csv.DictWriter(testing_out_file,
                                            fieldnames=out_field_names)
        testing_csv_writer.writeheader()
        testing_csv_writer.writerows(testing_out_rows)

        positivity_csv_writer = csv.DictWriter(positivity_out_file,
                                               fieldnames=out_field_names)
        positivity_csv_writer.writeheader()
        positivity_csv_writer.writerows(positivity_out_rows)
    print('Finished.')
INPUT = sys.argv[1]
OUTPUT = 'birthdates.csv'

BASE_URL = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=claims'
# START_AFTER = ('de', 'Nikolai_Gergow')
START_AFTER = None


def create_url(lang, name):
    return '%s&sites=%swiki&titles=%s' % (BASE_URL, lang, name)


with open(INPUT, 'r') as input_file, \
     open(OUTPUT, 'a') as outputfile:
    reader = csv.DictReader(input_file)
    writer = csv.DictWriter(outputfile,
                            fieldnames=['name', 'lang', 'birth_date'])
    writer.writeheader()

    if START_AFTER is not None:
        for line in reader:
            if (line['lang'], line['name']) == START_AFTER:
                break

    for line in reader:
        url = create_url(line['lang'], line['name'])
        print(url)

        r = requests.get(url)

        data = r.json()
Exemple #16
0
 def create(self,row,schema):
     with open(self.table_name, mode='a') as f:
         writer = csv.DictWriter(f, fieldnames=schema)
         writer.writerow(row)
        inStoreAvailability = item.get('inStoreAvailability', 'false')
        if inStoreAvailability == 'false':
            inStoreAvailability = False
        else:
            inStoreAvailability = True

        data = {
            'sku': sku,
            'name': name,
            'regularPrice': regular_price,
            'salePrice': sale_price,
            'type': typex,
            # 'upc': upc,
            'url': url,
            'image': image,
            'inStoreAvailability': inStoreAvailability
        }
        csv_writer.writerow(data)


if __name__ == '__main__':
    with open(output_filename, 'w') as fout:
        csv_writer = csv.DictWriter(
            fout,
            fieldnames=('sku', 'name', 'regularPrice', 'salePrice', 'type',
                        'url', 'image', 'inStoreAvailability'))
        xml_files = sorted(os.listdir(data_folder))
        for filename in xml_files:
            full_path = os.path.join(data_folder, filename)
            get_products_from_file(full_path, csv_writer)
	outjson.close()

	# Load the json to a Python object and write to csv

	outputfile_csv = datapath + ddate + '/' + 'irs_index_' + str(year) + '_' + udate + '.csv'

	with open(outputfile_json, 'r') as f:
		data = json.load(f)
		print(len(data.keys()))
	'''
		The json is a dictionary with one item: key=filings2011, value=list of dictionaries.
	'''	

	with open(outputfile_csv, 'w', newline='') as c:
		varnames = data['Filings' + str(year)][0].keys()
		writer = csv.DictWriter(c, varnames)
		print('---------------------')
		print('                     ')
		writer.writeheader()
		writer.writerows(data['Filings' + str(year)])
		
	year +=1

'''
# Append files together to form one dataset #

outputfile = datapath + ddate + '/' + 'irs_index_' + udate + '.json'
file1 = datapath + ddate + '/' + 'irs_index_2011_' + udate + '.json'
file2 = datapath + ddate + '/' + 'irs_index_2012_' + udate + '.json'
file3 = datapath + ddate + '/' + 'irs_index_2013_' + udate + '.json'
file4 = datapath + ddate + '/' + 'irs_index_2014_' + udate + '.json'
    # Download the page using requests
    print("Downloading %s"%url)
    r = requests.get(url, headers=headers)
    # Simple check to check if page was blocked (Usually 503)
    if r.status_code > 500:
        if "To discuss automated access to Amazon data please contact" in r.text:
            print("Page %s was blocked by Amazon. Please try using better proxies\n"%url)
        else:
            print("Page %s must have been blocked by Amazon as the status code was %d"%(url,r.status_code))
        return None
    # Pass the HTML of the page and create 
    return e.extract(r.text)

# product_data = []
with open("urls.txt",'r') as urllist, open('data.csv','w') as outfile:
    writer = csv.DictWriter(outfile, fieldnames=["title","content","date","variant","images","verified","author","rating","product","url"],quoting=csv.QUOTE_ALL)
    writer.writeheader()
    for url in urllist.readlines():
        data = scrape(url) 
        if data:
            for r in data['reviews']:
                r["product"] = data["product_title"]
                r['url'] = url
                if 'verified' in r:
                    if 'Verified Purchase' in r['verified']:
                        r['verified'] = 'Yes'
                    else:
                        r['verified'] = 'Yes'
                r['rating'] = r['rating'].split(' out of')[0]
                date_posted = r['date'].split('on ')[-1]
                if r['images']:
import csv #read and write the data
import time
import matplotlib.pyplot as plt #plotting library for python
import numpy as np #scientific computing library which contains Fourier, Linear Algebra etc.
import ecgF  as e

from decimal import getcontext #fast correctly rounded decimal points aritmetic

#getcontext().prec  =  4

with open("filter.csv","w") as csv_file:
    # with, It is designed to provide much cleaner syntax and
    #  exceptions handling when you are working with code.
    #csv The csv module helps you to elegantly process data stored within a CSV file.
    #
    csv_writer  =  csv.DictWriter(csv_file, fieldnames = ["type","lowf","highf","order"])
    csv_writer.writeheader() #Write a row with the field names (as specified in the constructor)
    info  =  {
                "type": "none",
                "lowf": 0.05,
                "highf":30,
                "order":5
                
                }
    csv_writer.writerow(info)

say = 2000

with open("Filtereddata.csv","w") as csv_file:
    csv_writer  =  csv.DictWriter(csv_file, fieldnames = ["t","f"])
    csv_writer.writeheader()
Exemple #21
0
def main():

    r = np.genfromtxt('datasets/RawData_fourth.csv',
                      delimiter=',',
                      names=True,
                      case_sensitive=True,
                      dtype='int')
    t = np.genfromtxt('datasets/RawData_time_fourth.csv',
                      delimiter=',',
                      names=True,
                      case_sensitive=True,
                      dtype='float')
    obs1 = np.zeros((r['Behaviours__1'].size, 37), dtype='int')
    obs_time1 = np.zeros((t['Time__1'].size, 37), dtype='float')
    n = obs1[:, 0].size
    obs = np.zeros((n, 37), dtype='int')
    obs.fill(-1)
    obs_time = np.zeros((n, 37), dtype='float')
    animalID = np.zeros(n, int)
    targetID = np.zeros(n, int)
    for ro in range(obs1[:, 0].size):
        for col in range(36):
            obs[ro][col] = r[ro][col + 5]

    for row in range(obs[:, 0].size):
        for col in range(36):
            if (obs[row][col] == -1):
                obs[row][col] = 9

    for row in range(obs_time1[:, 0].size):
        for col in range(36):
            obs_time[row][col] = t[row][col + 5]

    for row in range(obs[:, 0].size):
        animalID[row] = r[row][0]
        targetID[row] = r[row][4]

    pos = 0
    count = 0
    e = 0
    group1 = np.zeros(27, float)
    group2 = np.zeros(27, float)
    g1 = 0
    g2 = 0
    plot_val = np.arange(27)
    error_matrix = np.zeros(54, dtype='float')
    with open('Results/MPS_per_AnimalID_300_70.csv', 'w') as csvfile:
        fieldnames = ['AnimalID', 'TargetID', 'PATH']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        while pos != obs[:, 0].size:
            g = 0
            obs_set = np.zeros((12, 37), dtype='int')
            obs_time_set = np.zeros((12, 37), dtype='float')
            obs_set.fill(-1)
            for i in range(pos, pos + 12):
                for j in range(36):
                    obs_set[i - pos][j] = obs[i][j]
                    obs_time_set[i - pos][j] = obs_time[i][j]
            T = obs_set[0].shape[0]
            num_states = 9
            trans_mat = hmm_train.trans_prob_matrix(obs_set)
            trans_mat = np.log(trans_mat)
            emi_mat_norm = hmm_train.emission_prob_matrix(obs_set)
            emi_mat_norm[:, 36] = 0
            emi_mat = np.log(emi_mat_norm)
            emi_mat_time = hmm_train.emission_prob_matrix_time(
                obs_set, obs_time_set)
            emi_mat_time[:, 36] = 0
            emi_mat_time = np.log(emi_mat_time)
            path_set = np.empty(T, dtype='int')
            path_set.fill(-1)
            for t in range(T):
                if (emi_mat_norm[8, t] > 0.8):
                    path_set[t] = -2
                elif (emi_mat_norm[8, t] > 0.7 and emi_mat_norm[8, t] < 0.8):
                    for s in range(num_states - 1):
                        path_set[t] = np.argmax(emi_mat[:, t] +
                                                trans_mat[:, s])
                elif (emi_mat_norm[8, t] > 0.5 and emi_mat_norm[8, t] < 0.7):
                    for s in range(num_states - 1):
                        path_set[t] = np.argmin(emi_mat[:, t - 1] +
                                                trans_mat[s, s])
                else:
                    for s in range(num_states - 1):
                        path_set[t] = np.argmax(emi_mat[:, t - 1] +
                                                trans_mat[s, s] +
                                                emi_mat_time[:, t])

            path_set[36] = -2
            writer.writerow({
                'AnimalID': str(animalID[pos]),
                'TargetID': str(targetID[pos]),
                'PATH': str(path_set + 1)
            })
            '''for r in range(37):
                if(path_set[r]+1==-1):
                    g = g+1
            if(targetID[pos]==1):
                val = (36-g)/36
                group1[g1] = val
                g1 = g1 + 1
            if(targetID[pos]==2):
                val = (36-g)/36
                group2[g2] = val
                g2 = g2 + 1'''
            pos = pos + 12
def output_csv(rows):
    writer = csv.DictWriter(sys.stdout, FIELDS)
    writer.writeheader()

    for row in rows:
        writer.writerow(row)
Exemple #23
0
 def report_csv(self, all_ops: List[str], passed: List[Optional[str]],
                experimental: List[str]) -> None:
     for schema in _all_schemas:
         if schema.domain == '' or schema.domain == 'ai.onnx':
             all_ops.append(schema.name)
             if schema.support_level == defs.OpSchema.SupportType.EXPERIMENTAL:
                 experimental.append(schema.name)
     all_ops.sort()
     nodes_path = os.path.join(
         str(os.environ.get('CSVDIR')),  # type: ignore
         'nodes.csv')  # type: ignore
     models_path = os.path.join(
         str(os.environ.get('CSVDIR')),  # type: ignore
         'models.csv')  # type: ignore
     existing_nodes: OrderedDict[str, Dict[str, str]] = OrderedDict()
     existing_models: OrderedDict[str, Dict[str, str]] = OrderedDict()
     frameworks: List[str] = []
     if os.path.isfile(nodes_path):
         with open(nodes_path) as nodes_file:
             reader = csv.DictReader(nodes_file)
             assert reader.fieldnames
             frameworks = list(reader.fieldnames)
             for row in reader:
                 op = row['Op']
                 del row['Op']
                 existing_nodes[str(op)] = row
     if os.path.isfile(models_path):
         with open(models_path) as models_file:
             reader = csv.DictReader(models_file)
             for row in reader:
                 model = row['Model']
                 del row['Model']
                 existing_models[str(model)] = row
     backend = os.environ.get('BACKEND')
     other_frameworks = frameworks[1:]
     with open(nodes_path, 'w') as nodes_file:
         if 'Op' not in frameworks:
             frameworks.append('Op')
         if backend not in frameworks:
             frameworks.append(str(backend))
         else:
             other_frameworks.remove(str(backend))
         node_writer = csv.DictWriter(nodes_file, fieldnames=frameworks)
         node_writer.writeheader()
         for node in all_ops:
             node_name = node
             if node in experimental:
                 node_name = node + ' (Experimental)'
             if node_name not in existing_nodes:
                 # Also add Skipped for other nodes
                 existing_nodes[node_name] = OrderedDict()
                 for other_framework in other_frameworks:
                     existing_nodes[node_name][other_framework] = "Skipped!"
             if node in passed:
                 existing_nodes[node_name][str(backend)] = "Passed!"
             else:
                 existing_nodes[node_name][str(backend)] = "Failed!"
         summaries: Dict[Any, Any] = dict()
         if "Summary" in existing_nodes:
             summaries = existing_nodes["Summary"]
             del existing_nodes["Summary"]
         summaries[str(backend)] = \
             f"{len(passed)}/{len(all_ops)} node tests passed"
         summaries['Op'] = 'Summary'
         for node in existing_nodes:
             existing_nodes[node]['Op'] = str(node)
             node_writer.writerow(existing_nodes[node])
         node_writer.writerow(summaries)
     with open(models_path, 'w') as models_file:
         frameworks[0] = "Model"
         model_writer = csv.DictWriter(models_file, fieldnames=frameworks)
         model_writer.writeheader()
         # Consider both buckets
         num_models = 0
         for bucket in self.models:
             for model in self.models[bucket]:  # type: ignore
                 # Both analyze and run the model on the backend
                 num_covered = 0
                 for node in self.models[bucket][model].node_coverages:
                     if node in passed:
                         num_covered += 1
                 # TODO: Identify if there are models that are being
                 # skipped/not loaded, but that are in other frameworks
                 msg = "Passed!"
                 if bucket == 'loaded':
                     if model in self.models['passed']:
                         continue
                     msg = "Failed!"
                 num_models += 1
                 if model not in existing_models:
                     # Also add Skipped for other models
                     existing_models[model] = OrderedDict()
                     for other_framework in other_frameworks:
                         existing_models[model][
                             other_framework] = "Skipped!"
                 existing_models[model][str(backend)] = str(
                     "{}/{} nodes covered: {}".format(
                         num_covered,
                         len(self.models[bucket][model].node_coverages),
                         msg))
         summaries.clear()
         if "Summary" in existing_models:
             summaries = existing_models["Summary"]
             del existing_models["Summary"]
         if str(backend) in summaries:
             del summaries[str(backend)]
         summaries[str(backend)] = "{}/{} model tests passed" \
             .format(len(self.models['passed']), num_models)
         summaries['Model'] = 'Summary'
         for model in existing_models:  # type: ignore
             existing_models[model]['Model'] = model
             model_writer.writerow(existing_models[model])
         model_writer.writerow(summaries)
     with open(
             os.path.join(
                 str(os.environ.get('CSVDIR')),  # type: ignore
                 'metadata.csv'),
             'w') as metadata_file:  # type: ignore
         metadata_writer = csv.writer(metadata_file)
         metadata_writer.writerow([
             "Latest Update",
             datetime.datetime.now().isoformat().replace('T', ' ')
         ])
def get_list_batched(entity_desc,
                     request_uri,
                     out_file_json,
                     out_file_csv,
                     field_list,
                     extra_params=None):

    result_count = 1000000  # does not matter atm, offset of 0 will always dominate
    current_offset = 0
    result_limit = 1000

    list_obtained = []

    print(f"Looping through list of {entity_desc}")
    print(f"Current Offset: {current_offset}")
    print(f"Result Limit: {result_limit}")
    print(f"Result Count: {result_count}")

    while current_offset < result_count:

        request_uri_parameterized = f"{request_uri}?limit={result_limit}&offset={current_offset}"
        if extra_params:
            request_uri_parameterized = f"{request_uri_parameterized}&{extra_params}"

        current_offset = current_offset + result_limit

        stations_resp = requests.get(request_uri_parameterized, headers=head)

        if stations_resp.status_code != 200:
            print(stations_resp)
            raise ValueError(f"Error obtaining {entity_desc} list")

        print(stations_resp.json())
        current_results = stations_resp.json()
        result_count = current_results["metadata"]["resultset"]["count"]

        current_batch_results = current_results["results"]
        list_obtained.extend(current_batch_results)
        print()
        print("------------------------")
        print(f"Completed pull via: {request_uri_parameterized}")
        print(current_results["metadata"])
        print(
            f"Successfully obtained another batch of {len(current_batch_results)} {entity_desc}"
        )
        print(f"We now have data for {len(list_obtained)} {entity_desc} total")
        print("Current Batch:")
        for cbr in current_batch_results:
            print(cbr)

        print("Looping")
        print(f"Current Offset: {current_offset}")
        print(f"Result Limit: {result_limit}")
        print(f"Result Count: {result_count}")

    print()
    print()
    print(f"Pull Complete, total stations extracted: {len(list_obtained)}")

    # Write JSON to disk
    with open(out_file_json, 'w') as f:
        json.dump(list_obtained, f)

    # Write CSV to disk
    with open(out_file_csv, 'w') as output_file:
        dict_writer = csv.DictWriter(output_file,
                                     fieldnames=field_list,
                                     delimiter="|")
        dict_writer.writeheader()
        dict_writer.writerows(list_obtained)

    return list_obtained
def make_bayesdb_files(exp_data, analysis_params, cm_params):
  ec_cache = {}
  expfiles = json.load(open(exp_data, 'rb'))['tasbe_experimental_data']['samples']

  input_cols = []
  output_cols = []

  aparams = json.load(open(analysis_params, 'rb'))
  channels = aparams['tasbe_analysis_parameters']['channels']
  output_dir = aparams['tasbe_analysis_parameters']['output'].get('output_folder', 'output')
  label_map = (json.load(open(cm_params, 'rb'))['tasbe_color_model_parameters']['channel_parameters'])

  label_map = {matlab_sanitize(x['name']): '{}_MEFL'.format(x['label']) for x in label_map}


  print label_map

  for c in channels:
    if c not in output_cols:
      output_cols.append('{}_MEFL'.format(c))

  big_csv = []

  for file_id,f in enumerate(expfiles):
    pointfile = os.path.join(output_dir, os.path.basename(re.sub('.fcs', '_PointCloud.csv', f['file'])))
    if f['sample'] not in ec_cache:
      ec_cache[f['sample']] = ec.ExperimentalCondition("https://hub-api.sd2e.org/sparql", f['sample']).conditions
    conditions = ec_cache[f['sample']]


    for c in conditions:
      if c not in input_cols:
        input_cols.append(c)

    if 'file_id' not in input_cols:
        input_cols.append('file_id')

    this_csv = csv.DictReader(open(pointfile, 'rb'))
    for row in this_csv:
      row.update(conditions)
      row.update({'file_id':file_id})
      big_csv.append(row)

  with open(os.path.join(output_dir, 'bayesdb_data.csv'), 'wb') as bayesdb_datafile:

    print input_cols + output_cols

    writer = csv.DictWriter(bayesdb_datafile, fieldnames=input_cols + output_cols)
    writer.writeheader()
    for row in big_csv:
      writer.writerow(row)

  with open(os.path.join(output_dir, 'bayesdb_metadata.json'), 'wb') as bayesdb_metafile:
    metadata = {}
    metadata['outcome-variables'] = []
    metadata['experimental-variables'] = []
    for i in input_cols:
      metadata['experimental-variables'].append({'name': i})
    for o in output_cols:
      metadata['outcome-variables'].append({'name': o})
    json.dump(metadata, bayesdb_metafile)
def get_timeseries_batched(entity_desc,
                           datasetid,
                           stn_id,
                           year_start,
                           year_end,
                           request_uri,
                           out_file_json,
                           out_file_csv,
                           field_list,
                           extra_params=None):

    result_count = 1000000  # does not matter atm, offset of 0 will always dominate
    current_offset = 0
    result_limit = 1000
    no_results = False

    list_obtained = []

    license_key = cdo_tokens.get(block=True)
    logger.debug(f"Using License key {license_key}")

    stn_name = STATION_LOOKUP[stn_id]["stn_name"]
    logger.info(
        f"Pulling for station {stn_id}, readings from {year_start}-{year_end}, location: {stn_name}"
    )

    try:

        for year_current in range(year_start, year_end + 1):
            no_results = False
            current_offset = 0

            head = {'token': license_key}

            while not no_results and (current_offset < result_count):
                time.sleep(1)
                request_uri_parameterized = f"{request_uri}?limit={result_limit}&offset={current_offset}&startdate={year_current}-01-01&enddate={year_current}-12-31&datasetid={datasetid}&stn_id={stn_id}"

                if extra_params:
                    request_uri_parameterized = f"{request_uri_parameterized}&{extra_params}"

                current_offset = current_offset + result_limit

                logger.debug(
                    f"\tPulling for station {stn_id} for {year_current}")

                stations_resp = requests.get(request_uri_parameterized,
                                             headers=head)

                if stations_resp.status_code != 200:
                    logger.error(
                        f"Error with request: {request_uri_parameterized}")
                    logger.error(stations_resp)
                    # We DONT want to kill the whole process
                    #raise ValueError(f"Error obtaining {entity_desc} list")

                current_results = stations_resp.json()

                # Only process non-empty requests
                if "metadata" not in current_results:

                    no_results = True
                    logger.debug(
                        f"\tPulling for station {stn_id} for {year_current}: no results"
                    )
                else:

                    result_count = current_results["metadata"]["resultset"][
                        "count"]

                    current_batch_results = current_results["results"]
                    list_obtained.extend(current_batch_results)

                    logger.debug(
                        f"\tPulling for station {stn_id} for {year_current}: {len(list_obtained)} total results"
                    )

        logger.info(
            f"Pulling for station {stn_id}, readings from {year_start}-{year_end}, location: {stn_name}, completed with {len(list_obtained)} readings"
        )

        # Write CSV to disk
        logger.debug(
            f"Writing completed pull for station {stn_id} to {out_file_csv}")
        with open(out_file_csv, 'w') as output_file:
            dict_writer = csv.DictWriter(output_file,
                                         fieldnames=field_list,
                                         delimiter="|")
            dict_writer.writeheader()
            dict_writer.writerows(list_obtained)

    except Exception as err:
        logger.error(
            f"ERROR: Exception encountered on pull for station {stn_id}")
        logger.error(err)
        traceback.print_exc()

    cdo_tokens.put(license_key)

    return list_obtained
def update():
    print "************* UPDATE ****************"
    print "Update By \n1.ID\n2.Name"
    choice = input("Enter 1 or 2")
    if choice == 1:
        print "Enter Product ID to be updated\n"
        update_id = raw_input()
        condition = check(update_id)
        if condition == "not found":
            print "Entered ID not found\n"
            return
        print "Enter Price to be Updated\n"
        price = raw_input("Enter price\n")
        with open("inventory.csv", "rb") as read:
            reader = csv.DictReader(read)
            for i in reader:
                if i == condition:
                    with open("dummy.csv", "ab") as dummy:
                        writeHeader("dummy.csv")
                        fieldnames = ["item_id", "item_name", "item_price"]
                        writer = csv.DictWriter(dummy, fieldnames=fieldnames)
                        writer.writerow({
                            "item_id": i["item_id"],
                            "item_name": i["item_name"],
                            "item_price": price
                        })
                else:
                    with open("dummy.csv", "ab") as dummy:
                        writeHeader("dummy.csv")
                        fieldnames = ["item_id", "item_name", "item_price"]
                        writer = csv.DictWriter(dummy, fieldnames=fieldnames)
                        writer.writerow(i)

        copyfile("dummy.csv", "inventory.csv")
        os.remove("dummy.csv")
    elif choice == 2:
        print "Enter Product ID to be updated\n"
        update_name = raw_input()
        condition = check_name(update_name)
        if condition == "not found":
            print "Entered Name not found"
            return
        print "Enter Price to be Updated\n"
        price = raw_input("Enter price\n")
        with open("inventory.csv", "rb") as read:
            reader = csv.DictReader(read)
            for i in reader:
                if i == condition:
                    with open("dummy.csv", "ab") as dummy:
                        writeHeader("dummy.csv")
                        fieldnames = ["item_id", "item_name", "item_price"]
                        writer = csv.DictWriter(dummy, fieldnames=fieldnames)
                        writer.writerow({
                            "item_id": i["item_id"],
                            "item_name": i["item_name"],
                            "item_price": price
                        })
                else:
                    with open("dummy.csv", "ab") as dummy:
                        writeHeader("dummy.csv")
                        fieldnames = ["item_id", "item_name", "item_price"]
                        writer = csv.DictWriter(dummy, fieldnames=fieldnames)
                        writer.writerow(i)

        copyfile("dummy.csv", "inventory.csv")
        os.remove("dummy.csv")
    else:
        print "You have entered an incorrect option\n"
print("Number of columns with duplicates: " + str(len(key_list)))

# Cut duplicates
key_list = list(dict.fromkeys(key_list))

print("Number of columns without duplicates: " + str(len(key_list)))

print("Number of rows: " + str(len(json_list) + 1))

# That's useful for my project. It moves "file_path" and "<page title>" in front of key_list.
# If you are not me, skip this one. If you are me, damn you are awesome!
key_list.insert(0, key_list.pop(key_list.index("<page title>")))
key_list.insert(0, key_list.pop(key_list.index("file_path")))

print("Writing file " + csv_file_name)

# Write a single csv file with the content of every json found
with open(csv_file_name, mode='w') as csv_file:
    csv_writer = csv.DictWriter(csv_file, fieldnames=key_list)
    csv_writer.writeheader()
    for i in range(0, len(json_list)):
        csv_writer.writerow(json_list[i])

# And that's the end of the time counter
stop_time = time.time()

print("Execution completed. Yey. It took " +
      str("%.2f" % round((stop_time - start_time), 2)) + " seconds or, if you prefer, " +
str("%.2f" % round(((stop_time - start_time) / 60), 2)) + " minutes")
Exemple #29
0
def gp(sr, cos):
    print "Gross profit:             £", result_gp


cos(0, 0, 0)
gp(0, 0)

print " "
print "Saving data in CSV"
with open('Results.csv', 'w') as csvfile:
    fieldnames = [
        'Company', 'Sector', 'Current Liabilities', 'Non-Current Liabilities',
        'Current Assets', 'Non-Current Assests', 'Equity'
    ]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow({
        'Company': CN1,
        'Sector': S1,
        'Current Liabilities': CL,
        'Non-Current Liabilities': NCL,
        'Current Assets': CA,
        'Non-Current Assests': NCA,
        'Equity': EQ
    })
    writer.writerow({})

    fieldnames = [
        'Company', 'Sector', 'Purchases', 'Interest Payables', 'Sales',
        'Expenses', 'Interest Receivables', 'Opening Stock', 'Closing Stock'
Exemple #30
0
                                                 keyword_location)

    return location_of_closed_keyword


with open("../../Output/" + sys.argv[1], "r") as intermediateFeaturesFile:
    intermediateFeatureReader = csv.DictReader(intermediateFeaturesFile)
    fieldnames = [
        'id', 'name', 'some_capitalized', 'atleast_one_capitalized',
        'first_letter_capitalized', 'has_suffix_salutation', 'start_position',
        'distance_to_period', 'distance_to_closest_keyword', 'frequency',
        'contains_period', 'contains_keywords', 'name_length',
        'number_of_capitals', 'distance_to_closest_eol', 'label'
    ]

    output = csv.DictWriter(open("../../Output/" + sys.argv[2], "w"),
                            fieldnames=fieldnames)
    output.writeheader()

    for row in intermediateFeatureReader:
        identifier = row['id']
        name = row['name']
        number_of_capitals = sum(1 for c in name if c.isupper())
        contains_period = 0 if name.find(".") == -1 else 1
        name_length = len(name)
        some_capitalized = 1 if number_of_capitals >= 2 else 0
        atleast_one_capitalized = 1 if number_of_capitals >= 1 else 0
        first_letter_capitalized = 1 if name[0].isupper() else 0
        has_suffix_salutations = check_suffix_salutations()
        start_position = int(row['start_position'])
        end_position = int(row['end_position'])
        label = row['label']