Beispiel #1
0
def get_best_line(path="data/data.csv"):
    df = pd.read_csv(path)
    X, y = [], []
    country_name, _, country_type = get_country_info()
    for i in range(len(df["Country"])):
        if df.iloc[i][1].strip() in country_name:
            name = df.iloc[i][1].strip()
            X.append([df.iloc[i][2], df.iloc[i][3]])
            y.append(country_type[country_name.index(name)])
    LME, CME = {}, {}
    for i in range(len(X)):
        point = X[i]
        # The conutry is CME:
        if y[i] == 1:
            if point[0] not in CME:
                CME[point[0]] = [point[1]]
            else:
                CME[point[0]].append(point[1])
        # The conutry is CME:
        else:
            if point[0] not in LME:
                LME[point[0]] = [point[1]]
            else:
                LME[point[0]].append(point[1])

    for keys in LME:
        lst = LME[keys]
        LME[keys] = sum(lst) / len(lst)

    for keys in CME:
        lst = CME[keys]
        CME[keys] = sum(lst) / len(lst)
    print(LME)
    print(CME)
def init_output(df, yr_list, country_list):
    # Init a Dataframe with all NaN.
    for i in range(len(yr_list) * (len(country_list))):
        df1 = pd.DataFrame([["NaN"] * len(df.columns)], columns=df.columns)
        df = df1.append(df, ignore_index=True)

    _, _, country_type = get_country_info()

    # Write Country and Years into the column
    for i in range(len(country_list)):
        for j in range(len(yr_list)):
            pos = i * len(yr_list) + j
            df.iloc[pos][0] = country_list[i]
            df.iloc[pos][1] = int(yr_list[j])
            df.iloc[pos][2] = country_type[i]
    return df
def get_target_country(
    path="/Users/jinhanmei/Desktop/PATSTAT_Sample_2018Autumn_Global_CSV/tls801_country.csv"
):
    country, _, country_type = get_country_info()
    country = country[:country_type.index(0)]
    df = pd.read_csv(path, sep=None)
    titles = df.columns.ravel().tolist()
    ctry_code, ctry_name = df[titles[0]].tolist(), df[titles[2]].tolist()
    r_lst = []
    for i in range(len(ctry_name)):
        if ctry_name[i] in country:
            r_lst.append(ctry_code[i])
    # Add 'US' in the r_lst.
    r_lst.append('US')
    print(r_lst)
    return r_lst
def check_countries_application(path):
    _, _, all_target_country = get_country_info()
    df = pd.read_csv(path, sep=None)
    titles = df.columns.ravel().tolist()
    country_dict = {}
    target_ctry_code = get_target_country()
    for i in range(len(df[titles[1]])):
        # Check if the patent yr is in range:
        if int(df[titles[5]][i]) >= 2000:
            # Check if the patent is granted
            if df[titles[20]][i] == "Y":
                # Check if the patent country is wanted:
                if df[titles[1]][i] in target_ctry_code:
                    if df[titles[1]][i] not in country_dict:
                        country_dict[df[titles[1]][i]] = 1
                    else:
                        country_dict[df[titles[1]][i]] += 1
    print(country_dict)
    count = 0
    for keys in country_dict:
        count += country_dict[keys]
Beispiel #5
0
def get_plot(path="data/data.csv"):
    df = pd.read_csv(path)
    X, y = [], []
    country_name, _, country_type = get_country_info()
    print(country_name)
    for i in range(len(df["Country"])):
        if df.iloc[i][1].strip() in country_name:
            name = df.iloc[i][1].strip()
            X.append([df.iloc[i][2], df.iloc[i][3]])
            y.append(country_type[country_name.index(name)])
    X = np.asarray(X)

    plt.title("LME CME score through Change")
    plt.scatter(X[:, 0],
                X[:, 1],
                color=[
                    "r" if y_point == -1 else "b" if y_point == 1 else "g"
                    for y_point in y
                ],
                s=20,
                alpha=0.5)
    plt.show()